1/*
2 * Copyright (C) 2000 Peter Kelly <pmk@post.com>
3 * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2006 Alexey Proskuryakov <ap@webkit.org>
5 * Copyright (C) 2007 Samuel Weinig <sam@webkit.org>
6 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
7 * Copyright (C) 2008 Holger Hans Peter Freyther
8 * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
9 * Copyright (C) 2010 Patrick Gansterer <paroga@paroga.com>
10 * Copyright (C) 2013 Samsung Electronics. All rights reserved.
11 *
12 * This library is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Library General Public
14 * License as published by the Free Software Foundation; either
15 * version 2 of the License, or (at your option) any later version.
16 *
17 * This library is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20 * Library General Public License for more details.
21 *
22 * You should have received a copy of the GNU Library General Public License
23 * along with this library; see the file COPYING.LIB.  If not, write to
24 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25 * Boston, MA 02110-1301, USA.
26 */
27
28#include "config.h"
29#include "XMLDocumentParser.h"
30
31#include "CDATASection.h"
32#include "CachedScript.h"
33#include "Comment.h"
34#include "CachedResourceLoader.h"
35#include "Document.h"
36#include "DocumentFragment.h"
37#include "DocumentType.h"
38#include "ExceptionCodePlaceholder.h"
39#include "Frame.h"
40#include "FrameLoader.h"
41#include "FrameView.h"
42#include "HTMLEntityParser.h"
43#include "HTMLHtmlElement.h"
44#include "HTMLLinkElement.h"
45#include "HTMLNames.h"
46#include "HTMLStyleElement.h"
47#include "HTMLTemplateElement.h"
48#include "Page.h"
49#include "ProcessingInstruction.h"
50#include "ResourceError.h"
51#include "ResourceRequest.h"
52#include "ResourceResponse.h"
53#include "ScriptElement.h"
54#include "ScriptSourceCode.h"
55#include "SecurityOrigin.h"
56#include "Settings.h"
57#include "TextResourceDecoder.h"
58#include "TransformSource.h"
59#include "XMLNSNames.h"
60#include "XMLDocumentParserScope.h"
61#include <libxml/parserInternals.h>
62#include <wtf/Ref.h>
63#include <wtf/StringExtras.h>
64#include <wtf/Threading.h>
65#include <wtf/Vector.h>
66#include <wtf/unicode/UTF8.h>
67
68#if ENABLE(XSLT)
69#include "XMLTreeViewer.h"
70#include <libxslt/xslt.h>
71#endif
72
73namespace WebCore {
74
75static inline bool hasNoStyleInformation(Document* document)
76{
77    if (document->sawElementsInKnownNamespaces())
78        return false;
79#if ENABLE(XSLT)
80    if (document->transformSourceDocument())
81        return false;
82#endif
83    if (!document->frame() || !document->frame()->page())
84        return false;
85
86    if (!document->frame()->page()->settings().developerExtrasEnabled())
87        return false;
88
89    if (document->frame()->tree().parent())
90        return false; // This document is not in a top frame
91
92    return true;
93}
94
95class PendingCallbacks {
96    WTF_MAKE_NONCOPYABLE(PendingCallbacks); WTF_MAKE_FAST_ALLOCATED;
97public:
98    PendingCallbacks() { }
99    ~PendingCallbacks() { }
100
101    void appendStartElementNSCallback(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
102                                      const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** attributes)
103    {
104        auto callback = std::make_unique<PendingStartElementNSCallback>();
105
106        callback->xmlLocalName = xmlStrdup(xmlLocalName);
107        callback->xmlPrefix = xmlStrdup(xmlPrefix);
108        callback->xmlURI = xmlStrdup(xmlURI);
109        callback->nb_namespaces = nb_namespaces;
110        callback->namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * nb_namespaces * 2));
111        for (int i = 0; i < nb_namespaces * 2 ; i++)
112            callback->namespaces[i] = xmlStrdup(namespaces[i]);
113        callback->nb_attributes = nb_attributes;
114        callback->nb_defaulted = nb_defaulted;
115        callback->attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * nb_attributes * 5));
116        for (int i = 0; i < nb_attributes; i++) {
117            // Each attribute has 5 elements in the array:
118            // name, prefix, uri, value and an end pointer.
119
120            for (int j = 0; j < 3; j++)
121                callback->attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]);
122
123            int len = attributes[i * 5 + 4] - attributes[i * 5 + 3];
124
125            callback->attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], len);
126            callback->attributes[i * 5 + 4] = callback->attributes[i * 5 + 3] + len;
127        }
128
129        m_callbacks.append(WTF::move(callback));
130    }
131
132    void appendEndElementNSCallback()
133    {
134        m_callbacks.append(std::make_unique<PendingEndElementNSCallback>());
135    }
136
137    void appendCharactersCallback(const xmlChar* s, int len)
138    {
139        auto callback = std::make_unique<PendingCharactersCallback>();
140
141        callback->s = xmlStrndup(s, len);
142        callback->len = len;
143
144        m_callbacks.append(WTF::move(callback));
145    }
146
147    void appendProcessingInstructionCallback(const xmlChar* target, const xmlChar* data)
148    {
149        auto callback = std::make_unique<PendingProcessingInstructionCallback>();
150
151        callback->target = xmlStrdup(target);
152        callback->data = xmlStrdup(data);
153
154        m_callbacks.append(WTF::move(callback));
155    }
156
157    void appendCDATABlockCallback(const xmlChar* s, int len)
158    {
159        auto callback = std::make_unique<PendingCDATABlockCallback>();
160
161        callback->s = xmlStrndup(s, len);
162        callback->len = len;
163
164        m_callbacks.append(WTF::move(callback));
165    }
166
167    void appendCommentCallback(const xmlChar* s)
168    {
169        auto callback = std::make_unique<PendingCommentCallback>();
170
171        callback->s = xmlStrdup(s);
172
173        m_callbacks.append(WTF::move(callback));
174    }
175
176    void appendInternalSubsetCallback(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
177    {
178        auto callback = std::make_unique<PendingInternalSubsetCallback>();
179
180        callback->name = xmlStrdup(name);
181        callback->externalID = xmlStrdup(externalID);
182        callback->systemID = xmlStrdup(systemID);
183
184        m_callbacks.append(WTF::move(callback));
185    }
186
187    void appendErrorCallback(XMLErrors::ErrorType type, const xmlChar* message, OrdinalNumber lineNumber, OrdinalNumber columnNumber)
188    {
189        auto callback = std::make_unique<PendingErrorCallback>();
190
191        callback->message = xmlStrdup(message);
192        callback->type = type;
193        callback->lineNumber = lineNumber;
194        callback->columnNumber = columnNumber;
195
196        m_callbacks.append(WTF::move(callback));
197    }
198
199    void callAndRemoveFirstCallback(XMLDocumentParser* parser)
200    {
201        std::unique_ptr<PendingCallback> callback = m_callbacks.takeFirst();
202        callback->call(parser);
203    }
204
205    bool isEmpty() const { return m_callbacks.isEmpty(); }
206
207private:
208    struct PendingCallback {
209        virtual ~PendingCallback() { }
210        virtual void call(XMLDocumentParser* parser) = 0;
211    };
212
213    struct PendingStartElementNSCallback : public PendingCallback {
214        virtual ~PendingStartElementNSCallback()
215        {
216            xmlFree(xmlLocalName);
217            xmlFree(xmlPrefix);
218            xmlFree(xmlURI);
219            for (int i = 0; i < nb_namespaces * 2; i++)
220                xmlFree(namespaces[i]);
221            xmlFree(namespaces);
222            for (int i = 0; i < nb_attributes; i++)
223                for (int j = 0; j < 4; j++)
224                    xmlFree(attributes[i * 5 + j]);
225            xmlFree(attributes);
226        }
227
228        virtual void call(XMLDocumentParser* parser)
229        {
230            parser->startElementNs(xmlLocalName, xmlPrefix, xmlURI,
231                                      nb_namespaces, const_cast<const xmlChar**>(namespaces),
232                                      nb_attributes, nb_defaulted, const_cast<const xmlChar**>(attributes));
233        }
234
235        xmlChar* xmlLocalName;
236        xmlChar* xmlPrefix;
237        xmlChar* xmlURI;
238        int nb_namespaces;
239        xmlChar** namespaces;
240        int nb_attributes;
241        int nb_defaulted;
242        xmlChar** attributes;
243    };
244
245    struct PendingEndElementNSCallback : public PendingCallback {
246        virtual void call(XMLDocumentParser* parser)
247        {
248            parser->endElementNs();
249        }
250    };
251
252    struct PendingCharactersCallback : public PendingCallback {
253        virtual ~PendingCharactersCallback()
254        {
255            xmlFree(s);
256        }
257
258        virtual void call(XMLDocumentParser* parser)
259        {
260            parser->characters(s, len);
261        }
262
263        xmlChar* s;
264        int len;
265    };
266
267    struct PendingProcessingInstructionCallback : public PendingCallback {
268        virtual ~PendingProcessingInstructionCallback()
269        {
270            xmlFree(target);
271            xmlFree(data);
272        }
273
274        virtual void call(XMLDocumentParser* parser)
275        {
276            parser->processingInstruction(target, data);
277        }
278
279        xmlChar* target;
280        xmlChar* data;
281    };
282
283    struct PendingCDATABlockCallback : public PendingCallback {
284        virtual ~PendingCDATABlockCallback()
285        {
286            xmlFree(s);
287        }
288
289        virtual void call(XMLDocumentParser* parser)
290        {
291            parser->cdataBlock(s, len);
292        }
293
294        xmlChar* s;
295        int len;
296    };
297
298    struct PendingCommentCallback : public PendingCallback {
299        virtual ~PendingCommentCallback()
300        {
301            xmlFree(s);
302        }
303
304        virtual void call(XMLDocumentParser* parser)
305        {
306            parser->comment(s);
307        }
308
309        xmlChar* s;
310    };
311
312    struct PendingInternalSubsetCallback : public PendingCallback {
313        virtual ~PendingInternalSubsetCallback()
314        {
315            xmlFree(name);
316            xmlFree(externalID);
317            xmlFree(systemID);
318        }
319
320        virtual void call(XMLDocumentParser* parser)
321        {
322            parser->internalSubset(name, externalID, systemID);
323        }
324
325        xmlChar* name;
326        xmlChar* externalID;
327        xmlChar* systemID;
328    };
329
330    struct PendingErrorCallback: public PendingCallback {
331        virtual ~PendingErrorCallback()
332        {
333            xmlFree(message);
334        }
335
336        virtual void call(XMLDocumentParser* parser)
337        {
338            parser->handleError(type, reinterpret_cast<char*>(message), TextPosition(lineNumber, columnNumber));
339        }
340
341        XMLErrors::ErrorType type;
342        xmlChar* message;
343        OrdinalNumber lineNumber;
344        OrdinalNumber columnNumber;
345    };
346
347    Deque<std::unique_ptr<PendingCallback>> m_callbacks;
348};
349// --------------------------------
350
351static int globalDescriptor = 0;
352static ThreadIdentifier libxmlLoaderThread = 0;
353
354static int matchFunc(const char*)
355{
356    // Only match loads initiated due to uses of libxml2 from within XMLDocumentParser to avoid
357    // interfering with client applications that also use libxml2.  http://bugs.webkit.org/show_bug.cgi?id=17353
358    return XMLDocumentParserScope::currentCachedResourceLoader && currentThread() == libxmlLoaderThread;
359}
360
361class OffsetBuffer {
362    WTF_MAKE_FAST_ALLOCATED;
363public:
364    OffsetBuffer(Vector<char> buffer)
365        : m_buffer(WTF::move(buffer))
366        , m_currentOffset(0)
367    {
368    }
369
370    int readOutBytes(char* outputBuffer, unsigned askedToRead)
371    {
372        unsigned bytesLeft = m_buffer.size() - m_currentOffset;
373        unsigned lenToCopy = std::min(askedToRead, bytesLeft);
374        if (lenToCopy) {
375            memcpy(outputBuffer, m_buffer.data() + m_currentOffset, lenToCopy);
376            m_currentOffset += lenToCopy;
377        }
378        return lenToCopy;
379    }
380
381private:
382    Vector<char> m_buffer;
383    unsigned m_currentOffset;
384};
385
386static inline void setAttributes(Element* element, Vector<Attribute>& attributeVector, ParserContentPolicy parserContentPolicy)
387{
388    if (!scriptingContentIsAllowed(parserContentPolicy))
389        element->stripScriptingAttributes(attributeVector);
390    element->parserSetAttributes(attributeVector);
391}
392
393static void switchToUTF16(xmlParserCtxtPtr ctxt)
394{
395    // Hack around libxml2's lack of encoding overide support by manually
396    // resetting the encoding to UTF-16 before every chunk.  Otherwise libxml
397    // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks
398    // and switch encodings, causing the parse to fail.
399
400    // FIXME: Can we just use XML_PARSE_IGNORE_ENC now?
401
402    const UChar BOM = 0xFEFF;
403    const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
404    xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
405}
406
407static bool shouldAllowExternalLoad(const URL& url)
408{
409    String urlString = url.string();
410
411    // On non-Windows platforms libxml asks for this URL, the
412    // "XML_XML_DEFAULT_CATALOG", on initialization.
413    if (urlString == "file:///etc/xml/catalog")
414        return false;
415
416    // On Windows, libxml computes a URL relative to where its DLL resides.
417    if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false))
418        return false;
419
420    // The most common DTD.  There isn't much point in hammering www.w3c.org
421    // by requesting this URL for every XHTML document.
422    if (urlString.startsWith("http://www.w3.org/TR/xhtml", false))
423        return false;
424
425    // Similarly, there isn't much point in requesting the SVG DTD.
426    if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false))
427        return false;
428
429    // The libxml doesn't give us a lot of context for deciding whether to
430    // allow this request.  In the worst case, this load could be for an
431    // external entity and the resulting document could simply read the
432    // retrieved content.  If we had more context, we could potentially allow
433    // the parser to load a DTD.  As things stand, we take the conservative
434    // route and allow same-origin requests only.
435    if (!XMLDocumentParserScope::currentCachedResourceLoader->document()->securityOrigin()->canRequest(url)) {
436        XMLDocumentParserScope::currentCachedResourceLoader->printAccessDeniedMessage(url);
437        return false;
438    }
439
440    return true;
441}
442
443static void* openFunc(const char* uri)
444{
445    ASSERT(XMLDocumentParserScope::currentCachedResourceLoader);
446    ASSERT(currentThread() == libxmlLoaderThread);
447
448    URL url(URL(), uri);
449
450    if (!shouldAllowExternalLoad(url))
451        return &globalDescriptor;
452
453    ResourceError error;
454    ResourceResponse response;
455    Vector<char> data;
456
457
458    {
459        CachedResourceLoader* cachedResourceLoader = XMLDocumentParserScope::currentCachedResourceLoader;
460        XMLDocumentParserScope scope(0);
461        // FIXME: We should restore the original global error handler as well.
462
463        if (cachedResourceLoader->frame())
464            cachedResourceLoader->frame()->loader().loadResourceSynchronously(url, AllowStoredCredentials, DoNotAskClientForCrossOriginCredentials, error, response, data);
465    }
466
467    // We have to check the URL again after the load to catch redirects.
468    // See <https://bugs.webkit.org/show_bug.cgi?id=21963>.
469    if (!shouldAllowExternalLoad(response.url()))
470        return &globalDescriptor;
471
472    return new OffsetBuffer(WTF::move(data));
473}
474
475static int readFunc(void* context, char* buffer, int len)
476{
477    // Do 0-byte reads in case of a null descriptor
478    if (context == &globalDescriptor)
479        return 0;
480
481    OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
482    return data->readOutBytes(buffer, len);
483}
484
485static int writeFunc(void*, const char*, int)
486{
487    // Always just do 0-byte writes
488    return 0;
489}
490
491static int closeFunc(void* context)
492{
493    if (context != &globalDescriptor) {
494        OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
495        delete data;
496    }
497    return 0;
498}
499
500#if ENABLE(XSLT)
501static void errorFunc(void*, const char*, ...)
502{
503    // FIXME: It would be nice to display error messages somewhere.
504}
505#endif
506
507static bool didInit = false;
508
509PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData)
510{
511    if (!didInit) {
512        xmlInitParser();
513        xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
514        xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
515        libxmlLoaderThread = currentThread();
516        didInit = true;
517    }
518
519    xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
520    parser->_private = userData;
521
522    // Substitute entities.
523    xmlCtxtUseOptions(parser, XML_PARSE_NOENT);
524
525    switchToUTF16(parser);
526
527    return adoptRef(new XMLParserContext(parser));
528}
529
530
531// Chunk should be encoded in UTF-8
532PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const CString& chunk)
533{
534    if (!didInit) {
535        xmlInitParser();
536        xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
537        xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
538        libxmlLoaderThread = currentThread();
539        didInit = true;
540    }
541
542    // appendFragmentSource() checks that the length doesn't overflow an int.
543    xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk.data(), chunk.length());
544
545    if (!parser)
546        return 0;
547
548    memcpy(parser->sax, handlers, sizeof(xmlSAXHandler));
549
550    // Substitute entities.
551    // FIXME: Why is XML_PARSE_NODICT needed? This is different from what createStringParser does.
552    xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT);
553
554    // Internal initialization
555    parser->sax2 = 1;
556    parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT
557    parser->depth = 0;
558    parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3);
559    parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5);
560    parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36);
561    parser->_private = userData;
562
563    return adoptRef(new XMLParserContext(parser));
564}
565
566// --------------------------------
567
568bool XMLDocumentParser::supportsXMLVersion(const String& version)
569{
570    return version == "1.0";
571}
572
573XMLDocumentParser::XMLDocumentParser(Document& document, FrameView* frameView)
574    : ScriptableDocumentParser(document)
575    , m_view(frameView)
576    , m_context(0)
577    , m_pendingCallbacks(std::make_unique<PendingCallbacks>())
578    , m_depthTriggeringEntityExpansion(-1)
579    , m_isParsingEntityDeclaration(false)
580    , m_currentNode(&document)
581    , m_sawError(false)
582    , m_sawCSS(false)
583    , m_sawXSLTransform(false)
584    , m_sawFirstElement(false)
585    , m_isXHTMLDocument(false)
586    , m_parserPaused(false)
587    , m_requestingScript(false)
588    , m_finishCalled(false)
589    , m_pendingScript(0)
590    , m_scriptStartPosition(TextPosition::belowRangePosition())
591    , m_parsingFragment(false)
592{
593}
594
595XMLDocumentParser::XMLDocumentParser(DocumentFragment& fragment, Element* parentElement, ParserContentPolicy parserContentPolicy)
596    : ScriptableDocumentParser(fragment.document(), parserContentPolicy)
597    , m_view(0)
598    , m_context(0)
599    , m_pendingCallbacks(std::make_unique<PendingCallbacks>())
600    , m_depthTriggeringEntityExpansion(-1)
601    , m_isParsingEntityDeclaration(false)
602    , m_currentNode(&fragment)
603    , m_sawError(false)
604    , m_sawCSS(false)
605    , m_sawXSLTransform(false)
606    , m_sawFirstElement(false)
607    , m_isXHTMLDocument(false)
608    , m_parserPaused(false)
609    , m_requestingScript(false)
610    , m_finishCalled(false)
611    , m_pendingScript(0)
612    , m_scriptStartPosition(TextPosition::belowRangePosition())
613    , m_parsingFragment(true)
614{
615    fragment.ref();
616
617    // Add namespaces based on the parent node
618    Vector<Element*> elemStack;
619    while (parentElement) {
620        elemStack.append(parentElement);
621
622        ContainerNode* n = parentElement->parentNode();
623        if (!n || !n->isElementNode())
624            break;
625        parentElement = toElement(n);
626    }
627
628    if (elemStack.isEmpty())
629        return;
630
631    // FIXME: Share code with isDefaultNamespace() per http://www.whatwg.org/specs/web-apps/current-work/multipage/the-xhtml-syntax.html#parsing-xhtml-fragments
632    for (; !elemStack.isEmpty(); elemStack.removeLast()) {
633        Element* element = elemStack.last();
634        if (element->hasAttributes()) {
635            for (const Attribute& attribute : element->attributesIterator()) {
636                if (attribute.localName() == xmlnsAtom)
637                    m_defaultNamespaceURI = attribute.value();
638                else if (attribute.prefix() == xmlnsAtom)
639                    m_prefixToNamespaceMap.set(attribute.localName(), attribute.value());
640            }
641        }
642    }
643
644    if (m_defaultNamespaceURI.isNull())
645        m_defaultNamespaceURI = parentElement->namespaceURI();
646}
647
648XMLParserContext::~XMLParserContext()
649{
650    if (m_context->myDoc)
651        xmlFreeDoc(m_context->myDoc);
652    xmlFreeParserCtxt(m_context);
653}
654
655XMLDocumentParser::~XMLDocumentParser()
656{
657    // The XMLDocumentParser will always be detached before being destroyed.
658    ASSERT(m_currentNodeStack.isEmpty());
659    ASSERT(!m_currentNode);
660
661    // FIXME: m_pendingScript handling should be moved into XMLDocumentParser.cpp!
662    if (m_pendingScript)
663        m_pendingScript->removeClient(this);
664}
665
666void XMLDocumentParser::doWrite(const String& parseString)
667{
668    ASSERT(!isDetached());
669    if (!m_context)
670        initializeParserContext();
671
672    // Protect the libxml context from deletion during a callback
673    RefPtr<XMLParserContext> context = m_context;
674
675    // libXML throws an error if you try to switch the encoding for an empty string.
676    if (parseString.length()) {
677        // JavaScript may cause the parser to detach during xmlParseChunk
678        // keep this alive until this function is done.
679        Ref<XMLDocumentParser> protect(*this);
680
681        XMLDocumentParserScope scope(document()->cachedResourceLoader());
682
683        // FIXME: Can we parse 8-bit strings directly as Latin-1 instead of upconverting to UTF-16?
684        switchToUTF16(context->context());
685        xmlParseChunk(context->context(), reinterpret_cast<const char*>(StringView(parseString).upconvertedCharacters().get()), sizeof(UChar) * parseString.length(), 0);
686
687        // JavaScript (which may be run under the xmlParseChunk callstack) may
688        // cause the parser to be stopped or detached.
689        if (isStopped())
690            return;
691    }
692
693    // FIXME: Why is this here?  And why is it after we process the passed source?
694    if (document()->decoder() && document()->decoder()->sawError()) {
695        // If the decoder saw an error, report it as fatal (stops parsing)
696        TextPosition position(OrdinalNumber::fromOneBasedInt(context->context()->input->line), OrdinalNumber::fromOneBasedInt(context->context()->input->col));
697        handleError(XMLErrors::fatal, "Encoding error", position);
698    }
699}
700
701static inline String toString(const xmlChar* string, size_t size)
702{
703    return String::fromUTF8(reinterpret_cast<const char*>(string), size);
704}
705
706static inline String toString(const xmlChar* string)
707{
708    return String::fromUTF8(reinterpret_cast<const char*>(string));
709}
710
711static inline AtomicString toAtomicString(const xmlChar* string, size_t size)
712{
713    return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), size);
714}
715
716static inline AtomicString toAtomicString(const xmlChar* string)
717{
718    return AtomicString::fromUTF8(reinterpret_cast<const char*>(string));
719}
720
721struct _xmlSAX2Namespace {
722    const xmlChar* prefix;
723    const xmlChar* uri;
724};
725typedef struct _xmlSAX2Namespace xmlSAX2Namespace;
726
727static inline void handleNamespaceAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlNamespaces, int nb_namespaces, ExceptionCode& ec)
728{
729    xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces);
730    for (int i = 0; i < nb_namespaces; i++) {
731        AtomicString namespaceQName = xmlnsAtom;
732        AtomicString namespaceURI = toAtomicString(namespaces[i].uri);
733        if (namespaces[i].prefix)
734            namespaceQName = "xmlns:" + toString(namespaces[i].prefix);
735
736        QualifiedName parsedName = anyName;
737        if (!Element::parseAttributeName(parsedName, XMLNSNames::xmlnsNamespaceURI, namespaceQName, ec))
738            return;
739
740        prefixedAttributes.append(Attribute(parsedName, namespaceURI));
741    }
742}
743
744struct _xmlSAX2Attributes {
745    const xmlChar* localname;
746    const xmlChar* prefix;
747    const xmlChar* uri;
748    const xmlChar* value;
749    const xmlChar* end;
750};
751typedef struct _xmlSAX2Attributes xmlSAX2Attributes;
752
753static inline void handleElementAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlAttributes, int nb_attributes, ExceptionCode& ec)
754{
755    xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
756    for (int i = 0; i < nb_attributes; i++) {
757        int valueLength = static_cast<int>(attributes[i].end - attributes[i].value);
758        AtomicString attrValue = toAtomicString(attributes[i].value, valueLength);
759        String attrPrefix = toString(attributes[i].prefix);
760        AtomicString attrURI = attrPrefix.isEmpty() ? AtomicString() : toAtomicString(attributes[i].uri);
761        AtomicString attrQName = attrPrefix.isEmpty() ? toAtomicString(attributes[i].localname) : attrPrefix + ":" + toString(attributes[i].localname);
762
763        QualifiedName parsedName = anyName;
764        if (!Element::parseAttributeName(parsedName, attrURI, attrQName, ec))
765            return;
766
767        prefixedAttributes.append(Attribute(parsedName, attrValue));
768    }
769}
770
771// This is a hack around https://bugzilla.gnome.org/show_bug.cgi?id=502960
772// Otherwise libxml doesn't include namespace for parsed entities, breaking entity
773// expansion for all entities containing elements.
774static inline bool hackAroundLibXMLEntityParsingBug()
775{
776#if LIBXML_VERSION >= 20704
777    // This bug has been fixed in libxml 2.7.4.
778    return false;
779#else
780    return true;
781#endif
782}
783
784void XMLDocumentParser::startElementNs(const xmlChar* xmlLocalName, const xmlChar* xmlPrefix, const xmlChar* xmlURI, int nb_namespaces,
785                                  const xmlChar** libxmlNamespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes)
786{
787    if (isStopped())
788        return;
789
790    if (m_parserPaused) {
791        m_pendingCallbacks->appendStartElementNSCallback(xmlLocalName, xmlPrefix, xmlURI, nb_namespaces, libxmlNamespaces,
792                                                         nb_attributes, nb_defaulted, libxmlAttributes);
793        return;
794    }
795
796    exitText();
797
798    AtomicString localName = toAtomicString(xmlLocalName);
799    AtomicString uri = toAtomicString(xmlURI);
800    AtomicString prefix = toAtomicString(xmlPrefix);
801
802    if (m_parsingFragment && uri.isNull()) {
803        if (!prefix.isNull())
804            uri = m_prefixToNamespaceMap.get(prefix);
805        else
806            uri = m_defaultNamespaceURI;
807    }
808
809    // If libxml entity parsing is broken, transfer the currentNodes' namespaceURI to the new node,
810    // if we're currently expanding elements which originate from an entity declaration.
811    if (hackAroundLibXMLEntityParsingBug() && depthTriggeringEntityExpansion() != -1 && context()->depth > depthTriggeringEntityExpansion() && uri.isNull() && prefix.isNull())
812        uri = m_currentNode->namespaceURI();
813
814    bool isFirstElement = !m_sawFirstElement;
815    m_sawFirstElement = true;
816
817    QualifiedName qName(prefix, localName, uri);
818    RefPtr<Element> newElement = m_currentNode->document().createElement(qName, true);
819    if (!newElement) {
820        stopParsing();
821        return;
822    }
823
824    Vector<Attribute> prefixedAttributes;
825    ExceptionCode ec = 0;
826    handleNamespaceAttributes(prefixedAttributes, libxmlNamespaces, nb_namespaces, ec);
827    if (ec) {
828        setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
829        stopParsing();
830        return;
831    }
832
833    handleElementAttributes(prefixedAttributes, libxmlAttributes, nb_attributes, ec);
834    setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
835    if (ec) {
836        stopParsing();
837        return;
838    }
839
840    newElement->beginParsingChildren();
841
842    ScriptElement* scriptElement = toScriptElementIfPossible(newElement.get());
843    if (scriptElement)
844        m_scriptStartPosition = textPosition();
845
846    m_currentNode->parserAppendChild(newElement.get());
847    if (!m_currentNode) // Synchronous DOM events may have removed the current node.
848        return;
849
850#if ENABLE(TEMPLATE_ELEMENT)
851    if (newElement->hasTagName(HTMLNames::templateTag))
852        pushCurrentNode(toHTMLTemplateElement(newElement.get())->content());
853    else
854        pushCurrentNode(newElement.get());
855#else
856    pushCurrentNode(newElement.get());
857#endif
858
859    if (newElement->hasTagName(HTMLNames::htmlTag))
860        toHTMLHtmlElement(newElement.get())->insertedByParser();
861
862    if (!m_parsingFragment && isFirstElement && document()->frame())
863        document()->frame()->injectUserScripts(InjectAtDocumentStart);
864}
865
866void XMLDocumentParser::endElementNs()
867{
868    if (isStopped())
869        return;
870
871    if (m_parserPaused) {
872        m_pendingCallbacks->appendEndElementNSCallback();
873        return;
874    }
875
876    // JavaScript can detach the parser.  Make sure this is not released
877    // before the end of this method.
878    Ref<XMLDocumentParser> protect(*this);
879
880    exitText();
881
882    RefPtr<ContainerNode> n = m_currentNode;
883    n->finishParsingChildren();
884
885    // Once we reach the depth again where entity expansion started, stop executing the work-around.
886    if (hackAroundLibXMLEntityParsingBug() && context()->depth <= depthTriggeringEntityExpansion())
887        setDepthTriggeringEntityExpansion(-1);
888
889    if (!scriptingContentIsAllowed(parserContentPolicy()) && n->isElementNode() && toScriptElementIfPossible(toElement(n.get()))) {
890        popCurrentNode();
891        n->remove(IGNORE_EXCEPTION);
892        return;
893    }
894
895    if (!n->isElementNode() || !m_view) {
896        popCurrentNode();
897        return;
898    }
899
900    Element* element = toElement(n.get());
901
902    // The element's parent may have already been removed from document.
903    // Parsing continues in this case, but scripts aren't executed.
904    if (!element->inDocument()) {
905        popCurrentNode();
906        return;
907    }
908
909    ScriptElement* scriptElement = toScriptElementIfPossible(element);
910    if (!scriptElement) {
911        popCurrentNode();
912        return;
913    }
914
915    // Don't load external scripts for standalone documents (for now).
916    ASSERT(!m_pendingScript);
917    m_requestingScript = true;
918
919    if (scriptElement->prepareScript(m_scriptStartPosition, ScriptElement::AllowLegacyTypeInTypeAttribute)) {
920        // FIXME: Script execution should be shared between
921        // the libxml2 and Qt XMLDocumentParser implementations.
922
923        if (scriptElement->readyToBeParserExecuted())
924            scriptElement->executeScript(ScriptSourceCode(scriptElement->scriptContent(), document()->url(), m_scriptStartPosition));
925        else if (scriptElement->willBeParserExecuted()) {
926            m_pendingScript = scriptElement->cachedScript();
927            m_scriptElement = element;
928            m_pendingScript->addClient(this);
929
930            // m_pendingScript will be 0 if script was already loaded and addClient() executed it.
931            if (m_pendingScript)
932                pauseParsing();
933        } else
934            m_scriptElement = 0;
935
936        // JavaScript may have detached the parser
937        if (isDetached())
938            return;
939    }
940    m_requestingScript = false;
941    popCurrentNode();
942}
943
944void XMLDocumentParser::characters(const xmlChar* s, int len)
945{
946    if (isStopped())
947        return;
948
949    if (m_parserPaused) {
950        m_pendingCallbacks->appendCharactersCallback(s, len);
951        return;
952    }
953
954    if (!m_leafTextNode)
955        enterText();
956    m_bufferedText.append(s, len);
957}
958
959void XMLDocumentParser::error(XMLErrors::ErrorType type, const char* message, va_list args)
960{
961    if (isStopped())
962        return;
963
964#if HAVE(VASPRINTF)
965    char* m;
966    if (vasprintf(&m, message, args) == -1)
967        return;
968#else
969    char m[1024];
970    vsnprintf(m, sizeof(m) - 1, message, args);
971#endif
972
973    TextPosition position = textPosition();
974    if (m_parserPaused)
975        m_pendingCallbacks->appendErrorCallback(type, reinterpret_cast<const xmlChar*>(m), position.m_line, position.m_column);
976    else
977        handleError(type, m, textPosition());
978
979#if HAVE(VASPRINTF)
980    free(m);
981#endif
982}
983
984void XMLDocumentParser::processingInstruction(const xmlChar* target, const xmlChar* data)
985{
986    if (isStopped())
987        return;
988
989    if (m_parserPaused) {
990        m_pendingCallbacks->appendProcessingInstructionCallback(target, data);
991        return;
992    }
993
994    exitText();
995
996    // ### handle exceptions
997    ExceptionCode ec = 0;
998    RefPtr<ProcessingInstruction> pi = m_currentNode->document().createProcessingInstruction(
999        toString(target), toString(data), ec);
1000    if (ec)
1001        return;
1002
1003    pi->setCreatedByParser(true);
1004
1005    m_currentNode->parserAppendChild(pi.get());
1006
1007    pi->finishParsingChildren();
1008
1009    if (pi->isCSS())
1010        m_sawCSS = true;
1011#if ENABLE(XSLT)
1012    m_sawXSLTransform = !m_sawFirstElement && pi->isXSL();
1013    if (m_sawXSLTransform && !document()->transformSourceDocument())
1014        stopParsing();
1015#endif
1016}
1017
1018void XMLDocumentParser::cdataBlock(const xmlChar* s, int len)
1019{
1020    if (isStopped())
1021        return;
1022
1023    if (m_parserPaused) {
1024        m_pendingCallbacks->appendCDATABlockCallback(s, len);
1025        return;
1026    }
1027
1028    exitText();
1029
1030    RefPtr<CDATASection> newNode = CDATASection::create(m_currentNode->document(), toString(s, len));
1031    m_currentNode->parserAppendChild(newNode.release());
1032}
1033
1034void XMLDocumentParser::comment(const xmlChar* s)
1035{
1036    if (isStopped())
1037        return;
1038
1039    if (m_parserPaused) {
1040        m_pendingCallbacks->appendCommentCallback(s);
1041        return;
1042    }
1043
1044    exitText();
1045
1046    RefPtr<Comment> newNode = Comment::create(m_currentNode->document(), toString(s));
1047    m_currentNode->parserAppendChild(newNode.release());
1048}
1049
1050enum StandaloneInfo {
1051    StandaloneUnspecified = -2,
1052    NoXMlDeclaration,
1053    StandaloneNo,
1054    StandaloneYes
1055};
1056
1057void XMLDocumentParser::startDocument(const xmlChar* version, const xmlChar* encoding, int standalone)
1058{
1059    StandaloneInfo standaloneInfo = (StandaloneInfo)standalone;
1060    if (standaloneInfo == NoXMlDeclaration) {
1061        document()->setHasXMLDeclaration(false);
1062        return;
1063    }
1064
1065    if (version)
1066        document()->setXMLVersion(toString(version), ASSERT_NO_EXCEPTION);
1067    if (standalone != StandaloneUnspecified)
1068        document()->setXMLStandalone(standaloneInfo == StandaloneYes, ASSERT_NO_EXCEPTION);
1069    if (encoding)
1070        document()->setXMLEncoding(toString(encoding));
1071    document()->setHasXMLDeclaration(true);
1072}
1073
1074void XMLDocumentParser::endDocument()
1075{
1076    exitText();
1077}
1078
1079void XMLDocumentParser::internalSubset(const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
1080{
1081    if (isStopped())
1082        return;
1083
1084    if (m_parserPaused) {
1085        m_pendingCallbacks->appendInternalSubsetCallback(name, externalID, systemID);
1086        return;
1087    }
1088
1089    if (document())
1090        document()->parserAppendChild(DocumentType::create(*document(), toString(name), toString(externalID), toString(systemID)));
1091}
1092
1093static inline XMLDocumentParser* getParser(void* closure)
1094{
1095    xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1096    return static_cast<XMLDocumentParser*>(ctxt->_private);
1097}
1098
1099// This is a hack around http://bugzilla.gnome.org/show_bug.cgi?id=159219
1100// Otherwise libxml seems to call all the SAX callbacks twice for any replaced entity.
1101static inline bool hackAroundLibXMLEntityBug(void* closure)
1102{
1103#if LIBXML_VERSION >= 20627
1104    UNUSED_PARAM(closure);
1105
1106    // This bug has been fixed in libxml 2.6.27.
1107    return false;
1108#else
1109    return static_cast<xmlParserCtxtPtr>(closure)->node;
1110#endif
1111}
1112
1113static void startElementNsHandler(void* closure, const xmlChar* localname, const xmlChar* prefix, const xmlChar* uri, int nb_namespaces, const xmlChar** namespaces, int nb_attributes, int nb_defaulted, const xmlChar** libxmlAttributes)
1114{
1115    if (hackAroundLibXMLEntityBug(closure))
1116        return;
1117
1118    getParser(closure)->startElementNs(localname, prefix, uri, nb_namespaces, namespaces, nb_attributes, nb_defaulted, libxmlAttributes);
1119}
1120
1121static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*)
1122{
1123    if (hackAroundLibXMLEntityBug(closure))
1124        return;
1125
1126    getParser(closure)->endElementNs();
1127}
1128
1129static void charactersHandler(void* closure, const xmlChar* s, int len)
1130{
1131    if (hackAroundLibXMLEntityBug(closure))
1132        return;
1133
1134    getParser(closure)->characters(s, len);
1135}
1136
1137static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data)
1138{
1139    if (hackAroundLibXMLEntityBug(closure))
1140        return;
1141
1142    getParser(closure)->processingInstruction(target, data);
1143}
1144
1145static void cdataBlockHandler(void* closure, const xmlChar* s, int len)
1146{
1147    if (hackAroundLibXMLEntityBug(closure))
1148        return;
1149
1150    getParser(closure)->cdataBlock(s, len);
1151}
1152
1153static void commentHandler(void* closure, const xmlChar* comment)
1154{
1155    if (hackAroundLibXMLEntityBug(closure))
1156        return;
1157
1158    getParser(closure)->comment(comment);
1159}
1160
1161WTF_ATTRIBUTE_PRINTF(2, 3)
1162static void warningHandler(void* closure, const char* message, ...)
1163{
1164    va_list args;
1165    va_start(args, message);
1166    getParser(closure)->error(XMLErrors::warning, message, args);
1167    va_end(args);
1168}
1169
1170WTF_ATTRIBUTE_PRINTF(2, 3)
1171static void fatalErrorHandler(void* closure, const char* message, ...)
1172{
1173    va_list args;
1174    va_start(args, message);
1175    getParser(closure)->error(XMLErrors::fatal, message, args);
1176    va_end(args);
1177}
1178
1179WTF_ATTRIBUTE_PRINTF(2, 3)
1180static void normalErrorHandler(void* closure, const char* message, ...)
1181{
1182    va_list args;
1183    va_start(args, message);
1184    getParser(closure)->error(XMLErrors::nonFatal, message, args);
1185    va_end(args);
1186}
1187
1188// Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is
1189// a hack to avoid malloc/free. Using a global variable like this could cause trouble
1190// if libxml implementation details were to change
1191static xmlChar sharedXHTMLEntityResult[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
1192
1193static xmlEntityPtr sharedXHTMLEntity()
1194{
1195    static xmlEntity entity;
1196    if (!entity.type) {
1197        entity.type = XML_ENTITY_DECL;
1198        entity.orig = sharedXHTMLEntityResult;
1199        entity.content = sharedXHTMLEntityResult;
1200        entity.etype = XML_INTERNAL_PREDEFINED_ENTITY;
1201    }
1202    return &entity;
1203}
1204
1205static size_t convertUTF16EntityToUTF8(const UChar* utf16Entity, size_t numberOfCodeUnits, char* target, size_t targetSize)
1206{
1207    const char* originalTarget = target;
1208    WTF::Unicode::ConversionResult conversionResult = WTF::Unicode::convertUTF16ToUTF8(&utf16Entity,
1209        utf16Entity + numberOfCodeUnits, &target, target + targetSize);
1210    if (conversionResult != WTF::Unicode::conversionOK)
1211        return 0;
1212
1213    // Even though we must pass the length, libxml expects the entity string to be null terminated.
1214    ASSERT(target > originalTarget + 1);
1215    *target = '\0';
1216    return target - originalTarget;
1217}
1218
1219static xmlEntityPtr getXHTMLEntity(const xmlChar* name)
1220{
1221    UChar utf16DecodedEntity[4];
1222    size_t numberOfCodeUnits = decodeNamedEntityToUCharArray(reinterpret_cast<const char*>(name), utf16DecodedEntity);
1223    if (!numberOfCodeUnits)
1224        return 0;
1225
1226    ASSERT(numberOfCodeUnits <= 4);
1227    size_t entityLengthInUTF8 = convertUTF16EntityToUTF8(utf16DecodedEntity, numberOfCodeUnits,
1228        reinterpret_cast<char*>(sharedXHTMLEntityResult), WTF_ARRAY_LENGTH(sharedXHTMLEntityResult));
1229    if (!entityLengthInUTF8)
1230        return 0;
1231
1232    xmlEntityPtr entity = sharedXHTMLEntity();
1233    entity->length = entityLengthInUTF8;
1234    entity->name = name;
1235    return entity;
1236}
1237
1238static void entityDeclarationHandler(void* closure, const xmlChar* name, int type, const xmlChar* publicId, const xmlChar* systemId, xmlChar* content)
1239{
1240    // Prevent the next call to getEntityHandler() to record the entity expansion depth.
1241    // We're parsing the entity declaration, so there's no need to record anything.
1242    // We only need to record the depth, if we're actually expanding the entity, when it's referenced.
1243    if (hackAroundLibXMLEntityParsingBug())
1244        getParser(closure)->setIsParsingEntityDeclaration(true);
1245    xmlSAX2EntityDecl(closure, name, type, publicId, systemId, content);
1246}
1247
1248static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name)
1249{
1250    xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1251
1252    XMLDocumentParser* parser = getParser(closure);
1253    if (hackAroundLibXMLEntityParsingBug()) {
1254        if (parser->isParsingEntityDeclaration()) {
1255            // We're parsing the entity declarations (not an entity reference), no need to do anything special.
1256            parser->setIsParsingEntityDeclaration(false);
1257            ASSERT(parser->depthTriggeringEntityExpansion() == -1);
1258        } else {
1259            // The entity will be used and eventually expanded. Record the current parser depth
1260            // so the next call to startElementNs() knows that the new element originates from
1261            // an entity declaration.
1262            parser->setDepthTriggeringEntityExpansion(ctxt->depth);
1263        }
1264    }
1265
1266    xmlEntityPtr ent = xmlGetPredefinedEntity(name);
1267    if (ent) {
1268        ent->etype = XML_INTERNAL_PREDEFINED_ENTITY;
1269        return ent;
1270    }
1271
1272    ent = xmlGetDocEntity(ctxt->myDoc, name);
1273    if (!ent && parser->isXHTMLDocument()) {
1274        ent = getXHTMLEntity(name);
1275        if (ent)
1276            ent->etype = XML_INTERNAL_GENERAL_ENTITY;
1277    }
1278
1279    return ent;
1280}
1281
1282static void startDocumentHandler(void* closure)
1283{
1284    xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure);
1285    switchToUTF16(ctxt);
1286    getParser(closure)->startDocument(ctxt->version, ctxt->encoding, ctxt->standalone);
1287    xmlSAX2StartDocument(closure);
1288}
1289
1290static void endDocumentHandler(void* closure)
1291{
1292    getParser(closure)->endDocument();
1293    xmlSAX2EndDocument(closure);
1294}
1295
1296static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
1297{
1298    getParser(closure)->internalSubset(name, externalID, systemID);
1299    xmlSAX2InternalSubset(closure, name, externalID, systemID);
1300}
1301
1302static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*)
1303{
1304    String extId = toString(externalId);
1305    if ((extId == "-//W3C//DTD XHTML 1.0 Transitional//EN")
1306        || (extId == "-//W3C//DTD XHTML 1.1//EN")
1307        || (extId == "-//W3C//DTD XHTML 1.0 Strict//EN")
1308        || (extId == "-//W3C//DTD XHTML 1.0 Frameset//EN")
1309        || (extId == "-//W3C//DTD XHTML Basic 1.0//EN")
1310        || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN")
1311        || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN")
1312        || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN")
1313        || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN")
1314        || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.2//EN"))
1315        getParser(closure)->setIsXHTMLDocument(true); // controls if we replace entities or not.
1316}
1317
1318static void ignorableWhitespaceHandler(void*, const xmlChar*, int)
1319{
1320    // nothing to do, but we need this to work around a crasher
1321    // http://bugzilla.gnome.org/show_bug.cgi?id=172255
1322    // http://bugs.webkit.org/show_bug.cgi?id=5792
1323}
1324
1325void XMLDocumentParser::initializeParserContext(const CString& chunk)
1326{
1327    xmlSAXHandler sax;
1328    memset(&sax, 0, sizeof(sax));
1329
1330    sax.error = normalErrorHandler;
1331    sax.fatalError = fatalErrorHandler;
1332    sax.characters = charactersHandler;
1333    sax.processingInstruction = processingInstructionHandler;
1334    sax.cdataBlock = cdataBlockHandler;
1335    sax.comment = commentHandler;
1336    sax.warning = warningHandler;
1337    sax.startElementNs = startElementNsHandler;
1338    sax.endElementNs = endElementNsHandler;
1339    sax.getEntity = getEntityHandler;
1340    sax.startDocument = startDocumentHandler;
1341    sax.endDocument = endDocumentHandler;
1342    sax.internalSubset = internalSubsetHandler;
1343    sax.externalSubset = externalSubsetHandler;
1344    sax.ignorableWhitespace = ignorableWhitespaceHandler;
1345    sax.entityDecl = entityDeclarationHandler;
1346    sax.initialized = XML_SAX2_MAGIC;
1347    DocumentParser::startParsing();
1348    m_sawError = false;
1349    m_sawCSS = false;
1350    m_sawXSLTransform = false;
1351    m_sawFirstElement = false;
1352
1353    XMLDocumentParserScope scope(document()->cachedResourceLoader());
1354    if (m_parsingFragment)
1355        m_context = XMLParserContext::createMemoryParser(&sax, this, chunk);
1356    else {
1357        ASSERT(!chunk.data());
1358        m_context = XMLParserContext::createStringParser(&sax, this);
1359    }
1360}
1361
1362void XMLDocumentParser::doEnd()
1363{
1364    if (!isStopped()) {
1365        if (m_context) {
1366            // Tell libxml we're done.
1367            {
1368                XMLDocumentParserScope scope(document()->cachedResourceLoader());
1369                xmlParseChunk(context(), 0, 0, 1);
1370            }
1371
1372            m_context = 0;
1373        }
1374    }
1375
1376#if ENABLE(XSLT)
1377    bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && hasNoStyleInformation(document());
1378    if (xmlViewerMode) {
1379        XMLTreeViewer xmlTreeViewer(*document());
1380        xmlTreeViewer.transformDocumentToTreeView();
1381    } else if (m_sawXSLTransform) {
1382        void* doc = xmlDocPtrForString(document()->cachedResourceLoader(), m_originalSourceForTransform.toString(), document()->url().string());
1383        document()->setTransformSource(std::make_unique<TransformSource>(doc));
1384
1385        document()->setParsing(false); // Make the document think it's done, so it will apply XSL stylesheets.
1386        document()->styleResolverChanged(RecalcStyleImmediately);
1387
1388        // styleResolverChanged() call can detach the parser and null out its document.
1389        // In that case, we just bail out.
1390        if (isDetached())
1391            return;
1392
1393        document()->setParsing(true);
1394        DocumentParser::stopParsing();
1395    }
1396#endif
1397}
1398
1399#if ENABLE(XSLT)
1400static inline const char* nativeEndianUTF16Encoding()
1401{
1402    const UChar BOM = 0xFEFF;
1403    const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
1404    return BOMHighByte == 0xFF ? "UTF-16LE" : "UTF-16BE";
1405}
1406
1407void* xmlDocPtrForString(CachedResourceLoader* cachedResourceLoader, const String& source, const String& url)
1408{
1409    if (source.isEmpty())
1410        return 0;
1411
1412    // Parse in a single chunk into an xmlDocPtr
1413    // FIXME: Hook up error handlers so that a failure to parse the main document results in
1414    // good error messages.
1415
1416    const bool is8Bit = source.is8Bit();
1417    const char* characters = is8Bit ? reinterpret_cast<const char*>(source.characters8()) : reinterpret_cast<const char*>(source.characters16());
1418    size_t sizeInBytes = source.length() * (is8Bit ? sizeof(LChar) : sizeof(UChar));
1419    const char* encoding = is8Bit ? "iso-8859-1" : nativeEndianUTF16Encoding();
1420
1421    XMLDocumentParserScope scope(cachedResourceLoader, errorFunc, 0);
1422    return xmlReadMemory(characters, sizeInBytes, url.latin1().data(), encoding, XSLT_PARSE_OPTIONS);
1423}
1424#endif
1425
1426TextPosition XMLDocumentParser::textPosition() const
1427{
1428    xmlParserCtxtPtr context = this->context();
1429    if (!context)
1430        return TextPosition::minimumPosition();
1431    return TextPosition(OrdinalNumber::fromOneBasedInt(context->input->line),
1432                        OrdinalNumber::fromOneBasedInt(context->input->col));
1433}
1434
1435void XMLDocumentParser::stopParsing()
1436{
1437    DocumentParser::stopParsing();
1438    if (context())
1439        xmlStopParser(context());
1440}
1441
1442void XMLDocumentParser::resumeParsing()
1443{
1444    ASSERT(!isDetached());
1445    ASSERT(m_parserPaused);
1446
1447    m_parserPaused = false;
1448
1449    // First, execute any pending callbacks
1450    while (!m_pendingCallbacks->isEmpty()) {
1451        m_pendingCallbacks->callAndRemoveFirstCallback(this);
1452
1453        // A callback paused the parser
1454        if (m_parserPaused)
1455            return;
1456    }
1457
1458    // Then, write any pending data
1459    SegmentedString rest = m_pendingSrc;
1460    m_pendingSrc.clear();
1461    // There is normally only one string left, so toString() shouldn't copy.
1462    // In any case, the XML parser runs on the main thread and it's OK if
1463    // the passed string has more than one reference.
1464    append(rest.toString().impl());
1465
1466    // Finally, if finish() has been called and write() didn't result
1467    // in any further callbacks being queued, call end()
1468    if (m_finishCalled && m_pendingCallbacks->isEmpty())
1469        end();
1470}
1471
1472bool XMLDocumentParser::appendFragmentSource(const String& chunk)
1473{
1474    ASSERT(!m_context);
1475    ASSERT(m_parsingFragment);
1476
1477    CString chunkAsUtf8 = chunk.utf8();
1478
1479    // libxml2 takes an int for a length, and therefore can't handle XML chunks larger than 2 GiB.
1480    if (chunkAsUtf8.length() > INT_MAX)
1481        return false;
1482
1483    initializeParserContext(chunkAsUtf8);
1484    xmlParseContent(context());
1485    endDocument(); // Close any open text nodes.
1486
1487    // FIXME: If this code is actually needed, it should probably move to finish()
1488    // XMLDocumentParserQt has a similar check (m_stream.error() == QXmlStreamReader::PrematureEndOfDocumentError) in doEnd().
1489    // Check if all the chunk has been processed.
1490    long bytesProcessed = xmlByteConsumed(context());
1491    if (bytesProcessed == -1 || ((unsigned long)bytesProcessed) != chunkAsUtf8.length()) {
1492        // FIXME: I don't believe we can hit this case without also having seen an error or a null byte.
1493        // If we hit this ASSERT, we've found a test case which demonstrates the need for this code.
1494        ASSERT(m_sawError || (bytesProcessed >= 0 && !chunkAsUtf8.data()[bytesProcessed]));
1495        return false;
1496    }
1497
1498    // No error if the chunk is well formed or it is not but we have no error.
1499    return context()->wellFormed || !xmlCtxtGetLastError(context());
1500}
1501
1502// --------------------------------
1503
1504struct AttributeParseState {
1505    HashMap<String, String> attributes;
1506    bool gotAttributes;
1507};
1508
1509static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/,
1510                                            const xmlChar* /*xmlURI*/, int /*nb_namespaces*/, const xmlChar** /*namespaces*/,
1511                                            int nb_attributes, int /*nb_defaulted*/, const xmlChar** libxmlAttributes)
1512{
1513    if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0)
1514        return;
1515
1516    xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1517    AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private);
1518
1519    state->gotAttributes = true;
1520
1521    xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
1522    for (int i = 0; i < nb_attributes; i++) {
1523        String attrLocalName = toString(attributes[i].localname);
1524        int valueLength = (int) (attributes[i].end - attributes[i].value);
1525        String attrValue = toString(attributes[i].value, valueLength);
1526        String attrPrefix = toString(attributes[i].prefix);
1527        String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
1528
1529        state->attributes.set(attrQName, attrValue);
1530    }
1531}
1532
1533HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
1534{
1535    String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
1536
1537    AttributeParseState state;
1538    state.gotAttributes = false;
1539
1540    xmlSAXHandler sax;
1541    memset(&sax, 0, sizeof(sax));
1542    sax.startElementNs = attributesStartElementNsHandler;
1543    sax.initialized = XML_SAX2_MAGIC;
1544
1545    RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state);
1546
1547    // FIXME: Can we parse 8-bit strings directly as Latin-1 instead of upconverting to UTF-16?
1548    xmlParseChunk(parser->context(), reinterpret_cast<const char*>(StringView(parseString).upconvertedCharacters().get()), parseString.length() * sizeof(UChar), 1);
1549
1550    attrsOK = state.gotAttributes;
1551    return WTF::move(state.attributes);
1552}
1553
1554}
1555