1/*
2 * Copyright (C) 2000 Peter Kelly <pmk@post.com>
3 * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2006 Alexey Proskuryakov <ap@webkit.org>
5 * Copyright (C) 2007 Samuel Weinig <sam@webkit.org>
6 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
7 * Copyright (C) 2008 Holger Hans Peter Freyther
8 * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
9 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
10 *
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Library General Public
13 * License as published by the Free Software Foundation; either
14 * version 2 of the License, or (at your option) any later version.
15 *
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 * Library General Public License for more details.
20 *
21 * You should have received a copy of the GNU Library General Public License
22 * along with this library; see the file COPYING.LIB.  If not, write to
23 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
24 * Boston, MA 02110-1301, USA.
25 */
26
27#include "config.h"
28#include "XMLDocumentParser.h"
29
30#include "CDATASection.h"
31#include "CachedScript.h"
32#include "Comment.h"
33#include "CachedResourceLoader.h"
34#include "Document.h"
35#include "DocumentFragment.h"
36#include "DocumentType.h"
37#include "ExceptionCodePlaceholder.h"
38#include "Frame.h"
39#include "FrameLoader.h"
40#include "FrameView.h"
41#include "HTMLEntityParser.h"
42#include "HTMLHtmlElement.h"
43#include "HTMLLinkElement.h"
44#include "HTMLNames.h"
45#include "HTMLStyleElement.h"
46#include "HTMLTemplateElement.h"
47#include "ProcessingInstruction.h"
48#include "ResourceError.h"
49#include "ResourceRequest.h"
50#include "ResourceResponse.h"
51#include "ScriptElement.h"
52#include "ScriptSourceCode.h"
53#include "ScriptValue.h"
54#include "SecurityOrigin.h"
55#include "TextResourceDecoder.h"
56#include "TransformSource.h"
57#include "XMLNSNames.h"
58#include "XMLDocumentParserScope.h"
59#include <libxml/parser.h>
60#include <libxml/parserInternals.h>
61#include <wtf/text/CString.h>
62#include <wtf/StringExtras.h>
63#include <wtf/Threading.h>
64#include <wtf/Vector.h>
65#include <wtf/unicode/UTF8.h>
66
67#if ENABLE(XSLT)
68#include "XMLTreeViewer.h"
69#include <libxslt/xslt.h>
70#endif
71
72using namespace std;
73
74namespace WebCore {
75
76class PendingCallbacks {
77    WTF_MAKE_NONCOPYABLE(PendingCallbacks); WTF_MAKE_FAST_ALLOCATED;
78public:
79    ~PendingCallbacks() { }
80    static PassOwnPtr<PendingCallbacks> create()
81    {
82        return adoptPtr(new PendingCallbacks);
83    }
84
85    void appendStartElementNSCallback(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
86                                      const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** attributes)
87    {
88        OwnPtr<PendingStartElementNSCallback> callback = adoptPtr(new PendingStartElementNSCallback);
89
90        callback->xmlLocalName = xmlStrdup(xmlLocalName);
91        callback->xmlPrefix = xmlStrdup(xmlPrefix);
92        callback->xmlURI = xmlStrdup(xmlURI);
93        callback->nb_namespaces = nb_namespaces;
94        callback->namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * nb_namespaces * 2));
95        for (int i = 0; i < nb_namespaces * 2 ; i++)
96            callback->namespaces[i] = xmlStrdup(namespaces[i]);
97        callback->nb_attributes = nb_attributes;
98        callback->nb_defaulted = nb_defaulted;
99        callback->attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * nb_attributes * 5));
100        for (int i = 0; i < nb_attributes; i++) {
101            // Each attribute has 5 elements in the array:
102            // name, prefix, uri, value and an end pointer.
103
104            for (int j = 0; j < 3; j++)
105                callback->attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]);
106
107            int len = attributes[i * 5 + 4] - attributes[i * 5 + 3];
108
109            callback->attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], len);
110            callback->attributes[i * 5 + 4] = callback->attributes[i * 5 + 3] + len;
111        }
112
113        m_callbacks.append(callback.release());
114    }
115
116    void appendEndElementNSCallback()
117    {
118        m_callbacks.append(adoptPtr(new PendingEndElementNSCallback));
119    }
120
121    void appendCharactersCallback(const xmlChar* s, int len)
122    {
123        OwnPtr<PendingCharactersCallback> callback = adoptPtr(new PendingCharactersCallback);
124
125        callback->s = xmlStrndup(s, len);
126        callback->len = len;
127
128        m_callbacks.append(callback.release());
129    }
130
131    void appendProcessingInstructionCallback(const xmlChar* target, const xmlChar* data)
132    {
133        OwnPtr<PendingProcessingInstructionCallback> callback = adoptPtr(new PendingProcessingInstructionCallback);
134
135        callback->target = xmlStrdup(target);
136        callback->data = xmlStrdup(data);
137
138        m_callbacks.append(callback.release());
139    }
140
141    void appendCDATABlockCallback(const xmlChar* s, int len)
142    {
143        OwnPtr<PendingCDATABlockCallback> callback = adoptPtr(new PendingCDATABlockCallback);
144
145        callback->s = xmlStrndup(s, len);
146        callback->len = len;
147
148        m_callbacks.append(callback.release());
149    }
150
151    void appendCommentCallback(const xmlChar* s)
152    {
153        OwnPtr<PendingCommentCallback> callback = adoptPtr(new PendingCommentCallback);
154
155        callback->s = xmlStrdup(s);
156
157        m_callbacks.append(callback.release());
158    }
159
160    void appendInternalSubsetCallback(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
161    {
162        OwnPtr<PendingInternalSubsetCallback> callback = adoptPtr(new PendingInternalSubsetCallback);
163
164        callback->name = xmlStrdup(name);
165        callback->externalID = xmlStrdup(externalID);
166        callback->systemID = xmlStrdup(systemID);
167
168        m_callbacks.append(callback.release());
169    }
170
171    void appendErrorCallback(XMLErrors::ErrorType type, const xmlChar* message, OrdinalNumber lineNumber, OrdinalNumber columnNumber)
172    {
173        OwnPtr<PendingErrorCallback> callback = adoptPtr(new PendingErrorCallback);
174
175        callback->message = xmlStrdup(message);
176        callback->type = type;
177        callback->lineNumber = lineNumber;
178        callback->columnNumber = columnNumber;
179
180        m_callbacks.append(callback.release());
181    }
182
183    void callAndRemoveFirstCallback(XMLDocumentParser* parser)
184    {
185        OwnPtr<PendingCallback> callback = m_callbacks.takeFirst();
186        callback->call(parser);
187    }
188
189    bool isEmpty() const { return m_callbacks.isEmpty(); }
190
191private:
192    PendingCallbacks() { }
193
194    struct PendingCallback {
195        virtual ~PendingCallback() { }
196        virtual void call(XMLDocumentParser* parser) = 0;
197    };
198
199    struct PendingStartElementNSCallback : public PendingCallback {
200        virtual ~PendingStartElementNSCallback()
201        {
202            xmlFree(xmlLocalName);
203            xmlFree(xmlPrefix);
204            xmlFree(xmlURI);
205            for (int i = 0; i < nb_namespaces * 2; i++)
206                xmlFree(namespaces[i]);
207            xmlFree(namespaces);
208            for (int i = 0; i < nb_attributes; i++)
209                for (int j = 0; j < 4; j++)
210                    xmlFree(attributes[i * 5 + j]);
211            xmlFree(attributes);
212        }
213
214        virtual void call(XMLDocumentParser* parser)
215        {
216            parser->startElementNs(xmlLocalName, xmlPrefix, xmlURI,
217                                      nb_namespaces, const_cast<const xmlChar**>(namespaces),
218                                      nb_attributes, nb_defaulted, const_cast<const xmlChar**>(attributes));
219        }
220
221        xmlChar* xmlLocalName;
222        xmlChar* xmlPrefix;
223        xmlChar* xmlURI;
224        int nb_namespaces;
225        xmlChar** namespaces;
226        int nb_attributes;
227        int nb_defaulted;
228        xmlChar** attributes;
229    };
230
231    struct PendingEndElementNSCallback : public PendingCallback {
232        virtual void call(XMLDocumentParser* parser)
233        {
234            parser->endElementNs();
235        }
236    };
237
238    struct PendingCharactersCallback : public PendingCallback {
239        virtual ~PendingCharactersCallback()
240        {
241            xmlFree(s);
242        }
243
244        virtual void call(XMLDocumentParser* parser)
245        {
246            parser->characters(s, len);
247        }
248
249        xmlChar* s;
250        int len;
251    };
252
253    struct PendingProcessingInstructionCallback : public PendingCallback {
254        virtual ~PendingProcessingInstructionCallback()
255        {
256            xmlFree(target);
257            xmlFree(data);
258        }
259
260        virtual void call(XMLDocumentParser* parser)
261        {
262            parser->processingInstruction(target, data);
263        }
264
265        xmlChar* target;
266        xmlChar* data;
267    };
268
269    struct PendingCDATABlockCallback : public PendingCallback {
270        virtual ~PendingCDATABlockCallback()
271        {
272            xmlFree(s);
273        }
274
275        virtual void call(XMLDocumentParser* parser)
276        {
277            parser->cdataBlock(s, len);
278        }
279
280        xmlChar* s;
281        int len;
282    };
283
284    struct PendingCommentCallback : public PendingCallback {
285        virtual ~PendingCommentCallback()
286        {
287            xmlFree(s);
288        }
289
290        virtual void call(XMLDocumentParser* parser)
291        {
292            parser->comment(s);
293        }
294
295        xmlChar* s;
296    };
297
298    struct PendingInternalSubsetCallback : public PendingCallback {
299        virtual ~PendingInternalSubsetCallback()
300        {
301            xmlFree(name);
302            xmlFree(externalID);
303            xmlFree(systemID);
304        }
305
306        virtual void call(XMLDocumentParser* parser)
307        {
308            parser->internalSubset(name, externalID, systemID);
309        }
310
311        xmlChar* name;
312        xmlChar* externalID;
313        xmlChar* systemID;
314    };
315
316    struct PendingErrorCallback: public PendingCallback {
317        virtual ~PendingErrorCallback()
318        {
319            xmlFree(message);
320        }
321
322        virtual void call(XMLDocumentParser* parser)
323        {
324            parser->handleError(type, reinterpret_cast<char*>(message), TextPosition(lineNumber, columnNumber));
325        }
326
327        XMLErrors::ErrorType type;
328        xmlChar* message;
329        OrdinalNumber lineNumber;
330        OrdinalNumber columnNumber;
331    };
332
333    Deque<OwnPtr<PendingCallback> > m_callbacks;
334};
335// --------------------------------
336
337static int globalDescriptor = 0;
338static ThreadIdentifier libxmlLoaderThread = 0;
339
340static int matchFunc(const char*)
341{
342    // Only match loads initiated due to uses of libxml2 from within XMLDocumentParser to avoid
343    // interfering with client applications that also use libxml2.  http://bugs.webkit.org/show_bug.cgi?id=17353
344    return XMLDocumentParserScope::currentCachedResourceLoader && currentThread() == libxmlLoaderThread;
345}
346
347class OffsetBuffer {
348    WTF_MAKE_FAST_ALLOCATED;
349public:
350    OffsetBuffer(const Vector<char>& b) : m_buffer(b), m_currentOffset(0) { }
351
352    int readOutBytes(char* outputBuffer, unsigned askedToRead)
353    {
354        unsigned bytesLeft = m_buffer.size() - m_currentOffset;
355        unsigned lenToCopy = min(askedToRead, bytesLeft);
356        if (lenToCopy) {
357            memcpy(outputBuffer, m_buffer.data() + m_currentOffset, lenToCopy);
358            m_currentOffset += lenToCopy;
359        }
360        return lenToCopy;
361    }
362
363private:
364    Vector<char> m_buffer;
365    unsigned m_currentOffset;
366};
367
368static inline void setAttributes(Element* element, Vector<Attribute>& attributeVector, ParserContentPolicy parserContentPolicy)
369{
370    if (!scriptingContentIsAllowed(parserContentPolicy))
371        element->stripScriptingAttributes(attributeVector);
372    element->parserSetAttributes(attributeVector);
373}
374
375static void switchToUTF16(xmlParserCtxtPtr ctxt)
376{
377    // Hack around libxml2's lack of encoding overide support by manually
378    // resetting the encoding to UTF-16 before every chunk.  Otherwise libxml
379    // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks
380    // and switch encodings, causing the parse to fail.
381
382    // FIXME: Can we just use XML_PARSE_IGNORE_ENC now?
383
384    const UChar BOM = 0xFEFF;
385    const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
386    xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
387}
388
389static bool shouldAllowExternalLoad(const KURL& url)
390{
391    String urlString = url.string();
392
393    // On non-Windows platforms libxml asks for this URL, the
394    // "XML_XML_DEFAULT_CATALOG", on initialization.
395    if (urlString == "file:///etc/xml/catalog")
396        return false;
397
398    // On Windows, libxml computes a URL relative to where its DLL resides.
399    if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false))
400        return false;
401
402    // The most common DTD.  There isn't much point in hammering www.w3c.org
403    // by requesting this URL for every XHTML document.
404    if (urlString.startsWith("http://www.w3.org/TR/xhtml", false))
405        return false;
406
407    // Similarly, there isn't much point in requesting the SVG DTD.
408    if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false))
409        return false;
410
411    // The libxml doesn't give us a lot of context for deciding whether to
412    // allow this request.  In the worst case, this load could be for an
413    // external entity and the resulting document could simply read the
414    // retrieved content.  If we had more context, we could potentially allow
415    // the parser to load a DTD.  As things stand, we take the conservative
416    // route and allow same-origin requests only.
417    if (!XMLDocumentParserScope::currentCachedResourceLoader->document()->securityOrigin()->canRequest(url)) {
418        XMLDocumentParserScope::currentCachedResourceLoader->printAccessDeniedMessage(url);
419        return false;
420    }
421
422    return true;
423}
424
425static void* openFunc(const char* uri)
426{
427    ASSERT(XMLDocumentParserScope::currentCachedResourceLoader);
428    ASSERT(currentThread() == libxmlLoaderThread);
429
430    KURL url(KURL(), uri);
431
432    if (!shouldAllowExternalLoad(url))
433        return &globalDescriptor;
434
435    ResourceError error;
436    ResourceResponse response;
437    Vector<char> data;
438
439
440    {
441        CachedResourceLoader* cachedResourceLoader = XMLDocumentParserScope::currentCachedResourceLoader;
442        XMLDocumentParserScope scope(0);
443        // FIXME: We should restore the original global error handler as well.
444
445        if (cachedResourceLoader->frame())
446            cachedResourceLoader->frame()->loader()->loadResourceSynchronously(url, AllowStoredCredentials, DoNotAskClientForCrossOriginCredentials, error, response, data);
447    }
448
449    // We have to check the URL again after the load to catch redirects.
450    // See <https://bugs.webkit.org/show_bug.cgi?id=21963>.
451    if (!shouldAllowExternalLoad(response.url()))
452        return &globalDescriptor;
453
454    return new OffsetBuffer(data);
455}
456
457static int readFunc(void* context, char* buffer, int len)
458{
459    // Do 0-byte reads in case of a null descriptor
460    if (context == &globalDescriptor)
461        return 0;
462
463    OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
464    return data->readOutBytes(buffer, len);
465}
466
467static int writeFunc(void*, const char*, int)
468{
469    // Always just do 0-byte writes
470    return 0;
471}
472
473static int closeFunc(void* context)
474{
475    if (context != &globalDescriptor) {
476        OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
477        delete data;
478    }
479    return 0;
480}
481
482#if ENABLE(XSLT)
483static void errorFunc(void*, const char*, ...)
484{
485    // FIXME: It would be nice to display error messages somewhere.
486}
487#endif
488
489static bool didInit = false;
490
491PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData)
492{
493    if (!didInit) {
494        xmlInitParser();
495        xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
496        xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
497        libxmlLoaderThread = currentThread();
498        didInit = true;
499    }
500
501    xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
502    parser->_private = userData;
503
504    // Substitute entities.
505    xmlCtxtUseOptions(parser, XML_PARSE_NOENT);
506
507    switchToUTF16(parser);
508
509    return adoptRef(new XMLParserContext(parser));
510}
511
512
513// Chunk should be encoded in UTF-8
514PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const CString& chunk)
515{
516    if (!didInit) {
517        xmlInitParser();
518        xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
519        xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
520        libxmlLoaderThread = currentThread();
521        didInit = true;
522    }
523
524    // appendFragmentSource() checks that the length doesn't overflow an int.
525    xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk.data(), chunk.length());
526
527    if (!parser)
528        return 0;
529
530    memcpy(parser->sax, handlers, sizeof(xmlSAXHandler));
531
532    // Substitute entities.
533    // FIXME: Why is XML_PARSE_NODICT needed? This is different from what createStringParser does.
534    xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT);
535
536    // Internal initialization
537    parser->sax2 = 1;
538    parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT
539    parser->depth = 0;
540    parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3);
541    parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5);
542    parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36);
543    parser->_private = userData;
544
545    return adoptRef(new XMLParserContext(parser));
546}
547
548// --------------------------------
549
550bool XMLDocumentParser::supportsXMLVersion(const String& version)
551{
552    return version == "1.0";
553}
554
555XMLDocumentParser::XMLDocumentParser(Document* document, FrameView* frameView)
556    : ScriptableDocumentParser(document)
557    , m_view(frameView)
558    , m_context(0)
559    , m_pendingCallbacks(PendingCallbacks::create())
560    , m_depthTriggeringEntityExpansion(-1)
561    , m_isParsingEntityDeclaration(false)
562    , m_currentNode(document)
563    , m_sawError(false)
564    , m_sawCSS(false)
565    , m_sawXSLTransform(false)
566    , m_sawFirstElement(false)
567    , m_isXHTMLDocument(false)
568    , m_parserPaused(false)
569    , m_requestingScript(false)
570    , m_finishCalled(false)
571    , m_xmlErrors(document)
572    , m_pendingScript(0)
573    , m_scriptStartPosition(TextPosition::belowRangePosition())
574    , m_parsingFragment(false)
575{
576}
577
578XMLDocumentParser::XMLDocumentParser(DocumentFragment* fragment, Element* parentElement, ParserContentPolicy parserContentPolicy)
579    : ScriptableDocumentParser(fragment->document(), parserContentPolicy)
580    , m_view(0)
581    , m_context(0)
582    , m_pendingCallbacks(PendingCallbacks::create())
583    , m_depthTriggeringEntityExpansion(-1)
584    , m_isParsingEntityDeclaration(false)
585    , m_currentNode(fragment)
586    , m_sawError(false)
587    , m_sawCSS(false)
588    , m_sawXSLTransform(false)
589    , m_sawFirstElement(false)
590    , m_isXHTMLDocument(false)
591    , m_parserPaused(false)
592    , m_requestingScript(false)
593    , m_finishCalled(false)
594    , m_xmlErrors(fragment->document())
595    , m_pendingScript(0)
596    , m_scriptStartPosition(TextPosition::belowRangePosition())
597    , m_parsingFragment(true)
598{
599    fragment->ref();
600
601    // Add namespaces based on the parent node
602    Vector<Element*> elemStack;
603    while (parentElement) {
604        elemStack.append(parentElement);
605
606        ContainerNode* n = parentElement->parentNode();
607        if (!n || !n->isElementNode())
608            break;
609        parentElement = toElement(n);
610    }
611
612    if (elemStack.isEmpty())
613        return;
614
615    for (; !elemStack.isEmpty(); elemStack.removeLast()) {
616        Element* element = elemStack.last();
617        if (element->hasAttributes()) {
618            for (unsigned i = 0; i < element->attributeCount(); i++) {
619                const Attribute* attribute = element->attributeItem(i);
620                if (attribute->localName() == xmlnsAtom)
621                    m_defaultNamespaceURI = attribute->value();
622                else if (attribute->prefix() == xmlnsAtom)
623                    m_prefixToNamespaceMap.set(attribute->localName(), attribute->value());
624            }
625        }
626    }
627
628    // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace.
629    if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument())
630        m_defaultNamespaceURI = parentElement->namespaceURI();
631}
632
633XMLParserContext::~XMLParserContext()
634{
635    if (m_context->myDoc)
636        xmlFreeDoc(m_context->myDoc);
637    xmlFreeParserCtxt(m_context);
638}
639
640XMLDocumentParser::~XMLDocumentParser()
641{
642    // The XMLDocumentParser will always be detached before being destroyed.
643    ASSERT(m_currentNodeStack.isEmpty());
644    ASSERT(!m_currentNode);
645
646    // FIXME: m_pendingScript handling should be moved into XMLDocumentParser.cpp!
647    if (m_pendingScript)
648        m_pendingScript->removeClient(this);
649}
650
651void XMLDocumentParser::doWrite(const String& parseString)
652{
653    ASSERT(!isDetached());
654    if (!m_context)
655        initializeParserContext();
656
657    // Protect the libxml context from deletion during a callback
658    RefPtr<XMLParserContext> context = m_context;
659
660    // libXML throws an error if you try to switch the encoding for an empty string.
661    if (parseString.length()) {
662        // JavaScript may cause the parser to detach during xmlParseChunk
663        // keep this alive until this function is done.
664        RefPtr<XMLDocumentParser> protect(this);
665
666        switchToUTF16(context->context());
667        XMLDocumentParserScope scope(document()->cachedResourceLoader());
668        xmlParseChunk(context->context(), reinterpret_cast<const char*>(parseString.characters()), sizeof(UChar) * parseString.length(), 0);
669
670        // JavaScript (which may be run under the xmlParseChunk callstack) may
671        // cause the parser to be stopped or detached.
672        if (isStopped())
673            return;
674    }
675
676    // FIXME: Why is this here?  And why is it after we process the passed source?
677    if (document()->decoder() && document()->decoder()->sawError()) {
678        // If the decoder saw an error, report it as fatal (stops parsing)
679        TextPosition position(OrdinalNumber::fromOneBasedInt(context->context()->input->line), OrdinalNumber::fromOneBasedInt(context->context()->input->col));
680        handleError(XMLErrors::fatal, "Encoding error", position);
681    }
682}
683
684static inline String toString(const xmlChar* string, size_t size)
685{
686    return String::fromUTF8(reinterpret_cast<const char*>(string), size);
687}
688
689static inline String toString(const xmlChar* string)
690{
691    return String::fromUTF8(reinterpret_cast<const char*>(string));
692}
693
694static inline AtomicString toAtomicString(const xmlChar* string, size_t size)
695{
696    return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), size);
697}
698
699static inline AtomicString toAtomicString(const xmlChar* string)
700{
701    return AtomicString::fromUTF8(reinterpret_cast<const char*>(string));
702}
703
704struct _xmlSAX2Namespace {
705    const xmlChar* prefix;
706    const xmlChar* uri;
707};
708typedef struct _xmlSAX2Namespace xmlSAX2Namespace;
709
710static inline void handleNamespaceAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlNamespaces, int nb_namespaces, ExceptionCode& ec)
711{
712    xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces);
713    for (int i = 0; i < nb_namespaces; i++) {
714        AtomicString namespaceQName = xmlnsAtom;
715        AtomicString namespaceURI = toAtomicString(namespaces[i].uri);
716        if (namespaces[i].prefix)
717            namespaceQName = "xmlns:" + toString(namespaces[i].prefix);
718
719        QualifiedName parsedName = anyName;
720        if (!Element::parseAttributeName(parsedName, XMLNSNames::xmlnsNamespaceURI, namespaceQName, ec))
721            return;
722
723        prefixedAttributes.append(Attribute(parsedName, namespaceURI));
724    }
725}
726
727struct _xmlSAX2Attributes {
728    const xmlChar* localname;
729    const xmlChar* prefix;
730    const xmlChar* uri;
731    const xmlChar* value;
732    const xmlChar* end;
733};
734typedef struct _xmlSAX2Attributes xmlSAX2Attributes;
735
736static inline void handleElementAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlAttributes, int nb_attributes, ExceptionCode& ec)
737{
738    xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
739    for (int i = 0; i < nb_attributes; i++) {
740        int valueLength = static_cast<int>(attributes[i].end - attributes[i].value);
741        AtomicString attrValue = toAtomicString(attributes[i].value, valueLength);
742        String attrPrefix = toString(attributes[i].prefix);
743        AtomicString attrURI = attrPrefix.isEmpty() ? AtomicString() : toAtomicString(attributes[i].uri);
744        AtomicString attrQName = attrPrefix.isEmpty() ? toAtomicString(attributes[i].localname) : attrPrefix + ":" + toString(attributes[i].localname);
745
746        QualifiedName parsedName = anyName;
747        if (!Element::parseAttributeName(parsedName, attrURI, attrQName, ec))
748            return;
749
750        prefixedAttributes.append(Attribute(parsedName, attrValue));
751    }
752}
753
754// This is a hack around https://bugzilla.gnome.org/show_bug.cgi?id=502960
755// Otherwise libxml doesn't include namespace for parsed entities, breaking entity
756// expansion for all entities containing elements.
757static inline bool hackAroundLibXMLEntityParsingBug()
758{
759#if LIBXML_VERSION >= 20704
760    // This bug has been fixed in libxml 2.7.4.
761    return false;
762#else
763    return true;
764#endif
765}
766
767void XMLDocumentParser::startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
768                                  const xmlChar** libxmlNamespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes)
769{
770    if (isStopped())
771        return;
772
773    if (m_parserPaused) {
774        m_pendingCallbacks->appendStartElementNSCallback(xmlLocalName, xmlPrefix, xmlURI, nb_namespaces, libxmlNamespaces,
775                                                         nb_attributes, nb_defaulted, libxmlAttributes);
776        return;
777    }
778
779    exitText();
780
781    AtomicString localName = toAtomicString(xmlLocalName);
782    AtomicString uri = toAtomicString(xmlURI);
783    AtomicString prefix = toAtomicString(xmlPrefix);
784
785    if (m_parsingFragment && uri.isNull()) {
786        if (!prefix.isNull())
787            uri = m_prefixToNamespaceMap.get(prefix);
788        else
789            uri = m_defaultNamespaceURI;
790    }
791
792    // If libxml entity parsing is broken, transfer the currentNodes' namespaceURI to the new node,
793    // if we're currently expanding elements which originate from an entity declaration.
794    if (hackAroundLibXMLEntityParsingBug() && depthTriggeringEntityExpansion() != -1 && context()->depth > depthTriggeringEntityExpansion() && uri.isNull() && prefix.isNull())
795        uri = m_currentNode->namespaceURI();
796
797    bool isFirstElement = !m_sawFirstElement;
798    m_sawFirstElement = true;
799
800    QualifiedName qName(prefix, localName, uri);
801    RefPtr<Element> newElement = m_currentNode->document()->createElement(qName, true);
802    if (!newElement) {
803        stopParsing();
804        return;
805    }
806
807    Vector<Attribute> prefixedAttributes;
808    ExceptionCode ec = 0;
809    handleNamespaceAttributes(prefixedAttributes, libxmlNamespaces, nb_namespaces, ec);
810    if (ec) {
811        setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
812        stopParsing();
813        return;
814    }
815
816    handleElementAttributes(prefixedAttributes, libxmlAttributes, nb_attributes, ec);
817    setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
818    if (ec) {
819        stopParsing();
820        return;
821    }
822
823    newElement->beginParsingChildren();
824
825    ScriptElement* scriptElement = toScriptElementIfPossible(newElement.get());
826    if (scriptElement)
827        m_scriptStartPosition = textPosition();
828
829    m_currentNode->parserAppendChild(newElement.get());
830
831    const ContainerNode* currentNode = m_currentNode;
832#if ENABLE(TEMPLATE_ELEMENT)
833    if (newElement->hasTagName(HTMLNames::templateTag))
834        pushCurrentNode(toHTMLTemplateElement(newElement.get())->content());
835    else
836        pushCurrentNode(newElement.get());
837#else
838    pushCurrentNode(newElement.get());
839#endif
840
841    if (m_view && currentNode->attached() && !newElement->attached())
842        newElement->attach();
843
844    if (newElement->hasTagName(HTMLNames::htmlTag))
845        static_cast<HTMLHtmlElement*>(newElement.get())->insertedByParser();
846
847    if (!m_parsingFragment && isFirstElement && document()->frame())
848        document()->frame()->loader()->dispatchDocumentElementAvailable();
849}
850
851void XMLDocumentParser::endElementNs()
852{
853    if (isStopped())
854        return;
855
856    if (m_parserPaused) {
857        m_pendingCallbacks->appendEndElementNSCallback();
858        return;
859    }
860
861    // JavaScript can detach the parser.  Make sure this is not released
862    // before the end of this method.
863    RefPtr<XMLDocumentParser> protect(this);
864
865    exitText();
866
867    RefPtr<ContainerNode> n = m_currentNode;
868    n->finishParsingChildren();
869
870    // Once we reach the depth again where entity expansion started, stop executing the work-around.
871    if (hackAroundLibXMLEntityParsingBug() && context()->depth <= depthTriggeringEntityExpansion())
872        setDepthTriggeringEntityExpansion(-1);
873
874    if (!scriptingContentIsAllowed(parserContentPolicy()) && n->isElementNode() && toScriptElementIfPossible(toElement(n.get()))) {
875        popCurrentNode();
876        n->remove(IGNORE_EXCEPTION);
877        return;
878    }
879
880    if (!n->isElementNode() || !m_view) {
881        popCurrentNode();
882        return;
883    }
884
885    Element* element = toElement(n.get());
886
887    // The element's parent may have already been removed from document.
888    // Parsing continues in this case, but scripts aren't executed.
889    if (!element->inDocument()) {
890        popCurrentNode();
891        return;
892    }
893
894    ScriptElement* scriptElement = toScriptElementIfPossible(element);
895    if (!scriptElement) {
896        popCurrentNode();
897        return;
898    }
899
900    // Don't load external scripts for standalone documents (for now).
901    ASSERT(!m_pendingScript);
902    m_requestingScript = true;
903
904    if (scriptElement->prepareScript(m_scriptStartPosition, ScriptElement::AllowLegacyTypeInTypeAttribute)) {
905        // FIXME: Script execution should be shared between
906        // the libxml2 and Qt XMLDocumentParser implementations.
907
908        if (scriptElement->readyToBeParserExecuted())
909            scriptElement->executeScript(ScriptSourceCode(scriptElement->scriptContent(), document()->url(), m_scriptStartPosition));
910        else if (scriptElement->willBeParserExecuted()) {
911            m_pendingScript = scriptElement->cachedScript();
912            m_scriptElement = element;
913            m_pendingScript->addClient(this);
914
915            // m_pendingScript will be 0 if script was already loaded and addClient() executed it.
916            if (m_pendingScript)
917                pauseParsing();
918        } else
919            m_scriptElement = 0;
920
921        // JavaScript may have detached the parser
922        if (isDetached())
923            return;
924    }
925    m_requestingScript = false;
926    popCurrentNode();
927}
928
929void XMLDocumentParser::characters(const xmlChar* s, int len)
930{
931    if (isStopped())
932        return;
933
934    if (m_parserPaused) {
935        m_pendingCallbacks->appendCharactersCallback(s, len);
936        return;
937    }
938
939    if (!m_leafTextNode)
940        enterText();
941    m_bufferedText.append(s, len);
942}
943
944void XMLDocumentParser::error(XMLErrors::ErrorType type, const char* message, va_list args)
945{
946    if (isStopped())
947        return;
948
949#if HAVE(VASPRINTF)
950    char* m;
951    if (vasprintf(&m, message, args) == -1)
952        return;
953#else
954    char m[1024];
955    vsnprintf(m, sizeof(m) - 1, message, args);
956#endif
957
958    if (m_parserPaused)
959        m_pendingCallbacks->appendErrorCallback(type, reinterpret_cast<const xmlChar*>(m), lineNumber(), columnNumber());
960    else
961        handleError(type, m, textPosition());
962
963#if HAVE(VASPRINTF)
964    free(m);
965#endif
966}
967
968void XMLDocumentParser::processingInstruction(const xmlChar* target, const xmlChar* data)
969{
970    if (isStopped())
971        return;
972
973    if (m_parserPaused) {
974        m_pendingCallbacks->appendProcessingInstructionCallback(target, data);
975        return;
976    }
977
978    exitText();
979
980    // ### handle exceptions
981    ExceptionCode ec = 0;
982    RefPtr<ProcessingInstruction> pi = m_currentNode->document()->createProcessingInstruction(
983        toString(target), toString(data), ec);
984    if (ec)
985        return;
986
987    pi->setCreatedByParser(true);
988
989    m_currentNode->parserAppendChild(pi.get());
990    if (m_view && !pi->attached())
991        pi->attach();
992
993    pi->finishParsingChildren();
994
995    if (pi->isCSS())
996        m_sawCSS = true;
997#if ENABLE(XSLT)
998    m_sawXSLTransform = !m_sawFirstElement && pi->isXSL();
999    if (m_sawXSLTransform && !document()->transformSourceDocument())
1000        stopParsing();
1001#endif
1002}
1003
1004void XMLDocumentParser::cdataBlock(const xmlChar* s, int len)
1005{
1006    if (isStopped())
1007        return;
1008
1009    if (m_parserPaused) {
1010        m_pendingCallbacks->appendCDATABlockCallback(s, len);
1011        return;
1012    }
1013
1014    exitText();
1015
1016    RefPtr<CDATASection> newNode = CDATASection::create(m_currentNode->document(), toString(s, len));
1017    m_currentNode->parserAppendChild(newNode.get());
1018    if (m_view && !newNode->attached())
1019        newNode->attach();
1020}
1021
1022void XMLDocumentParser::comment(const xmlChar* s)
1023{
1024    if (isStopped())
1025        return;
1026
1027    if (m_parserPaused) {
1028        m_pendingCallbacks->appendCommentCallback(s);
1029        return;
1030    }
1031
1032    exitText();
1033
1034    RefPtr<Comment> newNode = Comment::create(m_currentNode->document(), toString(s));
1035    m_currentNode->parserAppendChild(newNode.get());
1036    if (m_view && !newNode->attached())
1037        newNode->attach();
1038}
1039
1040enum StandaloneInfo {
1041    StandaloneUnspecified = -2,
1042    NoXMlDeclaration,
1043    StandaloneNo,
1044    StandaloneYes
1045};
1046
1047void XMLDocumentParser::startDocument(const xmlChar* version, const xmlChar* encoding, int standalone)
1048{
1049    StandaloneInfo standaloneInfo = (StandaloneInfo)standalone;
1050    if (standaloneInfo == NoXMlDeclaration) {
1051        document()->setHasXMLDeclaration(false);
1052        return;
1053    }
1054
1055    if (version)
1056        document()->setXMLVersion(toString(version), ASSERT_NO_EXCEPTION);
1057    if (standalone != StandaloneUnspecified)
1058        document()->setXMLStandalone(standaloneInfo == StandaloneYes, ASSERT_NO_EXCEPTION);
1059    if (encoding)
1060        document()->setXMLEncoding(toString(encoding));
1061    document()->setHasXMLDeclaration(true);
1062}
1063
1064void XMLDocumentParser::endDocument()
1065{
1066    exitText();
1067}
1068
1069void XMLDocumentParser::internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
1070{
1071    if (isStopped())
1072        return;
1073
1074    if (m_parserPaused) {
1075        m_pendingCallbacks->appendInternalSubsetCallback(name, externalID, systemID);
1076        return;
1077    }
1078
1079    if (document())
1080        document()->parserAppendChild(DocumentType::create(document(), toString(name), toString(externalID), toString(systemID)));
1081}
1082
1083static inline XMLDocumentParser* getParser(void* closure)
1084{
1085    xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1086    return static_cast<XMLDocumentParser*>(ctxt->_private);
1087}
1088
1089// This is a hack around http://bugzilla.gnome.org/show_bug.cgi?id=159219
1090// Otherwise libxml seems to call all the SAX callbacks twice for any replaced entity.
1091static inline bool hackAroundLibXMLEntityBug(void* closure)
1092{
1093#if LIBXML_VERSION >= 20627
1094    UNUSED_PARAM(closure);
1095
1096    // This bug has been fixed in libxml 2.6.27.
1097    return false;
1098#else
1099    return static_cast<xmlParserCtxtPtr>(closure)->node;
1100#endif
1101}
1102
1103static void startElementNsHandler(void* closure, const xmlChar* localname, const xmlChar* prefix, const xmlChar* uri, int nb_namespaces, const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes)
1104{
1105    if (hackAroundLibXMLEntityBug(closure))
1106        return;
1107
1108    getParser(closure)->startElementNs(localname, prefix, uri, nb_namespaces, namespaces, nb_attributes, nb_defaulted, libxmlAttributes);
1109}
1110
1111static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*)
1112{
1113    if (hackAroundLibXMLEntityBug(closure))
1114        return;
1115
1116    getParser(closure)->endElementNs();
1117}
1118
1119static void charactersHandler(void* closure, const xmlChar* s, int len)
1120{
1121    if (hackAroundLibXMLEntityBug(closure))
1122        return;
1123
1124    getParser(closure)->characters(s, len);
1125}
1126
1127static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data)
1128{
1129    if (hackAroundLibXMLEntityBug(closure))
1130        return;
1131
1132    getParser(closure)->processingInstruction(target, data);
1133}
1134
1135static void cdataBlockHandler(void* closure, const xmlChar* s, int len)
1136{
1137    if (hackAroundLibXMLEntityBug(closure))
1138        return;
1139
1140    getParser(closure)->cdataBlock(s, len);
1141}
1142
1143static void commentHandler(void* closure, const xmlChar* comment)
1144{
1145    if (hackAroundLibXMLEntityBug(closure))
1146        return;
1147
1148    getParser(closure)->comment(comment);
1149}
1150
1151WTF_ATTRIBUTE_PRINTF(2, 3)
1152static void warningHandler(void* closure, const char* message, ...)
1153{
1154    va_list args;
1155    va_start(args, message);
1156    getParser(closure)->error(XMLErrors::warning, message, args);
1157    va_end(args);
1158}
1159
1160WTF_ATTRIBUTE_PRINTF(2, 3)
1161static void fatalErrorHandler(void* closure, const char* message, ...)
1162{
1163    va_list args;
1164    va_start(args, message);
1165    getParser(closure)->error(XMLErrors::fatal, message, args);
1166    va_end(args);
1167}
1168
1169WTF_ATTRIBUTE_PRINTF(2, 3)
1170static void normalErrorHandler(void* closure, const char* message, ...)
1171{
1172    va_list args;
1173    va_start(args, message);
1174    getParser(closure)->error(XMLErrors::nonFatal, message, args);
1175    va_end(args);
1176}
1177
1178// Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is
1179// a hack to avoid malloc/free. Using a global variable like this could cause trouble
1180// if libxml implementation details were to change
1181static xmlChar sharedXHTMLEntityResult[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
1182
1183static xmlEntityPtr sharedXHTMLEntity()
1184{
1185    static xmlEntity entity;
1186    if (!entity.type) {
1187        entity.type = XML_ENTITY_DECL;
1188        entity.orig = sharedXHTMLEntityResult;
1189        entity.content = sharedXHTMLEntityResult;
1190        entity.etype = XML_INTERNAL_PREDEFINED_ENTITY;
1191    }
1192    return &entity;
1193}
1194
1195static size_t convertUTF16EntityToUTF8(const UChar* utf16Entity, size_t numberOfCodeUnits, char* target, size_t targetSize)
1196{
1197    const char* originalTarget = target;
1198    WTF::Unicode::ConversionResult conversionResult = WTF::Unicode::convertUTF16ToUTF8(&utf16Entity,
1199        utf16Entity + numberOfCodeUnits, &target, target + targetSize);
1200    if (conversionResult != WTF::Unicode::conversionOK)
1201        return 0;
1202
1203    // Even though we must pass the length, libxml expects the entity string to be null terminated.
1204    ASSERT(target > originalTarget + 1);
1205    *target = '\0';
1206    return target - originalTarget;
1207}
1208
1209static xmlEntityPtr getXHTMLEntity(const xmlChar* name)
1210{
1211    UChar utf16DecodedEntity[4];
1212    size_t numberOfCodeUnits = decodeNamedEntityToUCharArray(reinterpret_cast<const char*>(name), utf16DecodedEntity);
1213    if (!numberOfCodeUnits)
1214        return 0;
1215
1216    ASSERT(numberOfCodeUnits <= 4);
1217    size_t entityLengthInUTF8 = convertUTF16EntityToUTF8(utf16DecodedEntity, numberOfCodeUnits,
1218        reinterpret_cast<char*>(sharedXHTMLEntityResult), WTF_ARRAY_LENGTH(sharedXHTMLEntityResult));
1219    if (!entityLengthInUTF8)
1220        return 0;
1221
1222    xmlEntityPtr entity = sharedXHTMLEntity();
1223    entity->length = entityLengthInUTF8;
1224    entity->name = name;
1225    return entity;
1226}
1227
1228static void entityDeclarationHandler(void* closure, const xmlChar* name, int type, const xmlChar* publicId, const xmlChar* systemId, xmlChar* content)
1229{
1230    // Prevent the next call to getEntityHandler() to record the entity expansion depth.
1231    // We're parsing the entity declaration, so there's no need to record anything.
1232    // We only need to record the depth, if we're actually expanding the entity, when it's referenced.
1233    if (hackAroundLibXMLEntityParsingBug())
1234        getParser(closure)->setIsParsingEntityDeclaration(true);
1235    xmlSAX2EntityDecl(closure, name, type, publicId, systemId, content);
1236}
1237
1238static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name)
1239{
1240    xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1241
1242    XMLDocumentParser* parser = getParser(closure);
1243    if (hackAroundLibXMLEntityParsingBug()) {
1244        if (parser->isParsingEntityDeclaration()) {
1245            // We're parsing the entity declarations (not an entity reference), no need to do anything special.
1246            parser->setIsParsingEntityDeclaration(false);
1247            ASSERT(parser->depthTriggeringEntityExpansion() == -1);
1248        } else {
1249            // The entity will be used and eventually expanded. Record the current parser depth
1250            // so the next call to startElementNs() knows that the new element originates from
1251            // an entity declaration.
1252            parser->setDepthTriggeringEntityExpansion(ctxt->depth);
1253        }
1254    }
1255
1256    xmlEntityPtr ent = xmlGetPredefinedEntity(name);
1257    if (ent) {
1258        ent->etype = XML_INTERNAL_PREDEFINED_ENTITY;
1259        return ent;
1260    }
1261
1262    ent = xmlGetDocEntity(ctxt->myDoc, name);
1263    if (!ent && parser->isXHTMLDocument()) {
1264        ent = getXHTMLEntity(name);
1265        if (ent)
1266            ent->etype = XML_INTERNAL_GENERAL_ENTITY;
1267    }
1268
1269    return ent;
1270}
1271
1272static void startDocumentHandler(void* closure)
1273{
1274    xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure);
1275    switchToUTF16(ctxt);
1276    getParser(closure)->startDocument(ctxt->version, ctxt->encoding, ctxt->standalone);
1277    xmlSAX2StartDocument(closure);
1278}
1279
1280static void endDocumentHandler(void* closure)
1281{
1282    getParser(closure)->endDocument();
1283    xmlSAX2EndDocument(closure);
1284}
1285
1286static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
1287{
1288    getParser(closure)->internalSubset(name, externalID, systemID);
1289    xmlSAX2InternalSubset(closure, name, externalID, systemID);
1290}
1291
1292static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*)
1293{
1294    String extId = toString(externalId);
1295    if ((extId == "-//W3C//DTD XHTML 1.0 Transitional//EN")
1296        || (extId == "-//W3C//DTD XHTML 1.1//EN")
1297        || (extId == "-//W3C//DTD XHTML 1.0 Strict//EN")
1298        || (extId == "-//W3C//DTD XHTML 1.0 Frameset//EN")
1299        || (extId == "-//W3C//DTD XHTML Basic 1.0//EN")
1300        || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN")
1301        || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN")
1302        || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN")
1303        || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN")
1304        || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.2//EN"))
1305        getParser(closure)->setIsXHTMLDocument(true); // controls if we replace entities or not.
1306}
1307
1308static void ignorableWhitespaceHandler(void*, const xmlChar*, int)
1309{
1310    // nothing to do, but we need this to work around a crasher
1311    // http://bugzilla.gnome.org/show_bug.cgi?id=172255
1312    // http://bugs.webkit.org/show_bug.cgi?id=5792
1313}
1314
1315void XMLDocumentParser::initializeParserContext(const CString& chunk)
1316{
1317    xmlSAXHandler sax;
1318    memset(&sax, 0, sizeof(sax));
1319
1320    sax.error = normalErrorHandler;
1321    sax.fatalError = fatalErrorHandler;
1322    sax.characters = charactersHandler;
1323    sax.processingInstruction = processingInstructionHandler;
1324    sax.cdataBlock = cdataBlockHandler;
1325    sax.comment = commentHandler;
1326    sax.warning = warningHandler;
1327    sax.startElementNs = startElementNsHandler;
1328    sax.endElementNs = endElementNsHandler;
1329    sax.getEntity = getEntityHandler;
1330    sax.startDocument = startDocumentHandler;
1331    sax.endDocument = endDocumentHandler;
1332    sax.internalSubset = internalSubsetHandler;
1333    sax.externalSubset = externalSubsetHandler;
1334    sax.ignorableWhitespace = ignorableWhitespaceHandler;
1335    sax.entityDecl = entityDeclarationHandler;
1336    sax.initialized = XML_SAX2_MAGIC;
1337    DocumentParser::startParsing();
1338    m_sawError = false;
1339    m_sawCSS = false;
1340    m_sawXSLTransform = false;
1341    m_sawFirstElement = false;
1342
1343    XMLDocumentParserScope scope(document()->cachedResourceLoader());
1344    if (m_parsingFragment)
1345        m_context = XMLParserContext::createMemoryParser(&sax, this, chunk);
1346    else {
1347        ASSERT(!chunk.data());
1348        m_context = XMLParserContext::createStringParser(&sax, this);
1349    }
1350}
1351
1352void XMLDocumentParser::doEnd()
1353{
1354    if (!isStopped()) {
1355        if (m_context) {
1356            // Tell libxml we're done.
1357            {
1358                XMLDocumentParserScope scope(document()->cachedResourceLoader());
1359                xmlParseChunk(context(), 0, 0, 1);
1360            }
1361
1362            m_context = 0;
1363        }
1364    }
1365
1366#if ENABLE(XSLT)
1367    XMLTreeViewer xmlTreeViewer(document());
1368    bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && xmlTreeViewer.hasNoStyleInformation();
1369    if (xmlViewerMode)
1370        xmlTreeViewer.transformDocumentToTreeView();
1371
1372    if (m_sawXSLTransform) {
1373        void* doc = xmlDocPtrForString(document()->cachedResourceLoader(), m_originalSourceForTransform.toString(), document()->url().string());
1374        document()->setTransformSource(adoptPtr(new TransformSource(doc)));
1375
1376        document()->setParsing(false); // Make the document think it's done, so it will apply XSL stylesheets.
1377        document()->styleResolverChanged(RecalcStyleImmediately);
1378
1379        // styleResolverChanged() call can detach the parser and null out its document.
1380        // In that case, we just bail out.
1381        if (isDetached())
1382            return;
1383
1384        document()->setParsing(true);
1385        DocumentParser::stopParsing();
1386    }
1387#endif
1388}
1389
1390#if ENABLE(XSLT)
1391void* xmlDocPtrForString(CachedResourceLoader* cachedResourceLoader, const String& source, const String& url)
1392{
1393    if (source.isEmpty())
1394        return 0;
1395
1396    // Parse in a single chunk into an xmlDocPtr
1397    // FIXME: Hook up error handlers so that a failure to parse the main document results in
1398    // good error messages.
1399    const UChar BOM = 0xFEFF;
1400    const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
1401
1402    XMLDocumentParserScope scope(cachedResourceLoader, errorFunc, 0);
1403    xmlDocPtr sourceDoc = xmlReadMemory(reinterpret_cast<const char*>(source.characters()),
1404                                        source.length() * sizeof(UChar),
1405                                        url.latin1().data(),
1406                                        BOMHighByte == 0xFF ? "UTF-16LE" : "UTF-16BE",
1407                                        XSLT_PARSE_OPTIONS);
1408    return sourceDoc;
1409}
1410#endif
1411
1412OrdinalNumber XMLDocumentParser::lineNumber() const
1413{
1414    return OrdinalNumber::fromOneBasedInt(context() ? context()->input->line : 1);
1415}
1416
1417OrdinalNumber XMLDocumentParser::columnNumber() const
1418{
1419    return OrdinalNumber::fromOneBasedInt(context() ? context()->input->col : 1);
1420}
1421
1422TextPosition XMLDocumentParser::textPosition() const
1423{
1424    xmlParserCtxtPtr context = this->context();
1425    if (!context)
1426        return TextPosition::minimumPosition();
1427    return TextPosition(OrdinalNumber::fromOneBasedInt(context->input->line),
1428                        OrdinalNumber::fromOneBasedInt(context->input->col));
1429}
1430
1431void XMLDocumentParser::stopParsing()
1432{
1433    DocumentParser::stopParsing();
1434    if (context())
1435        xmlStopParser(context());
1436}
1437
1438void XMLDocumentParser::resumeParsing()
1439{
1440    ASSERT(!isDetached());
1441    ASSERT(m_parserPaused);
1442
1443    m_parserPaused = false;
1444
1445    // First, execute any pending callbacks
1446    while (!m_pendingCallbacks->isEmpty()) {
1447        m_pendingCallbacks->callAndRemoveFirstCallback(this);
1448
1449        // A callback paused the parser
1450        if (m_parserPaused)
1451            return;
1452    }
1453
1454    // Then, write any pending data
1455    SegmentedString rest = m_pendingSrc;
1456    m_pendingSrc.clear();
1457    // There is normally only one string left, so toString() shouldn't copy.
1458    // In any case, the XML parser runs on the main thread and it's OK if
1459    // the passed string has more than one reference.
1460    append(rest.toString().impl());
1461
1462    // Finally, if finish() has been called and write() didn't result
1463    // in any further callbacks being queued, call end()
1464    if (m_finishCalled && m_pendingCallbacks->isEmpty())
1465        end();
1466}
1467
1468bool XMLDocumentParser::appendFragmentSource(const String& chunk)
1469{
1470    ASSERT(!m_context);
1471    ASSERT(m_parsingFragment);
1472
1473    CString chunkAsUtf8 = chunk.utf8();
1474
1475    // libxml2 takes an int for a length, and therefore can't handle XML chunks larger than 2 GiB.
1476    if (chunkAsUtf8.length() > INT_MAX)
1477        return false;
1478
1479    initializeParserContext(chunkAsUtf8);
1480    xmlParseContent(context());
1481    endDocument(); // Close any open text nodes.
1482
1483    // FIXME: If this code is actually needed, it should probably move to finish()
1484    // XMLDocumentParserQt has a similar check (m_stream.error() == QXmlStreamReader::PrematureEndOfDocumentError) in doEnd().
1485    // Check if all the chunk has been processed.
1486    long bytesProcessed = xmlByteConsumed(context());
1487    if (bytesProcessed == -1 || ((unsigned long)bytesProcessed) != chunkAsUtf8.length()) {
1488        // FIXME: I don't believe we can hit this case without also having seen an error or a null byte.
1489        // If we hit this ASSERT, we've found a test case which demonstrates the need for this code.
1490        ASSERT(m_sawError || (bytesProcessed >= 0 && !chunkAsUtf8.data()[bytesProcessed]));
1491        return false;
1492    }
1493
1494    // No error if the chunk is well formed or it is not but we have no error.
1495    return context()->wellFormed || !xmlCtxtGetLastError(context());
1496}
1497
1498// --------------------------------
1499
1500struct AttributeParseState {
1501    HashMap<String, String> attributes;
1502    bool gotAttributes;
1503};
1504
1505static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/,
1506                                            const xmlChar* /*xmlURI*/, int /*nb_namespaces*/, const xmlChar** /*namespaces*/,
1507                                            int nb_attributes, int /*nb_defaulted*/, const xmlChar** libxmlAttributes)
1508{
1509    if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0)
1510        return;
1511
1512    xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1513    AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private);
1514
1515    state->gotAttributes = true;
1516
1517    xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
1518    for (int i = 0; i < nb_attributes; i++) {
1519        String attrLocalName = toString(attributes[i].localname);
1520        int valueLength = (int) (attributes[i].end - attributes[i].value);
1521        String attrValue = toString(attributes[i].value, valueLength);
1522        String attrPrefix = toString(attributes[i].prefix);
1523        String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
1524
1525        state->attributes.set(attrQName, attrValue);
1526    }
1527}
1528
1529HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
1530{
1531    AttributeParseState state;
1532    state.gotAttributes = false;
1533
1534    xmlSAXHandler sax;
1535    memset(&sax, 0, sizeof(sax));
1536    sax.startElementNs = attributesStartElementNsHandler;
1537    sax.initialized = XML_SAX2_MAGIC;
1538    RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state);
1539    String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
1540    xmlParseChunk(parser->context(), reinterpret_cast<const char*>(parseString.characters()), parseString.length() * sizeof(UChar), 1);
1541    attrsOK = state.gotAttributes;
1542    return state.attributes;
1543}
1544
1545}
1546