1/*
2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved.
3 * Copyright (C) 2009, 2010 Google Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
18 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
20 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "config.h"
28#include "MarkupAccumulator.h"
29
30#include "CDATASection.h"
31#include "Comment.h"
32#include "DocumentFragment.h"
33#include "DocumentType.h"
34#include "Editor.h"
35#include "HTMLElement.h"
36#include "HTMLNames.h"
37#include "HTMLTemplateElement.h"
38#include "KURL.h"
39#include "ProcessingInstruction.h"
40#include "XLinkNames.h"
41#include "XMLNSNames.h"
42#include "XMLNames.h"
43#include <wtf/unicode/CharacterNames.h>
44
45namespace WebCore {
46
47using namespace HTMLNames;
48
49void MarkupAccumulator::appendCharactersReplacingEntities(StringBuilder& result, const String& source, unsigned offset, unsigned length, EntityMask entityMask)
50{
51    DEFINE_STATIC_LOCAL(const String, ampReference, (ASCIILiteral("&amp;")));
52    DEFINE_STATIC_LOCAL(const String, ltReference, (ASCIILiteral("&lt;")));
53    DEFINE_STATIC_LOCAL(const String, gtReference, (ASCIILiteral("&gt;")));
54    DEFINE_STATIC_LOCAL(const String, quotReference, (ASCIILiteral("&quot;")));
55    DEFINE_STATIC_LOCAL(const String, nbspReference, (ASCIILiteral("&nbsp;")));
56
57    static const EntityDescription entityMaps[] = {
58        { '&', ampReference, EntityAmp },
59        { '<', ltReference, EntityLt },
60        { '>', gtReference, EntityGt },
61        { '"', quotReference, EntityQuot },
62        { noBreakSpace, nbspReference, EntityNbsp },
63    };
64
65    if (!(offset + length))
66        return;
67
68    ASSERT(offset + length <= source.length());
69
70    if (source.is8Bit()) {
71        const LChar* text = source.characters8() + offset;
72
73        size_t positionAfterLastEntity = 0;
74        for (size_t i = 0; i < length; ++i) {
75            for (size_t entityIndex = 0; entityIndex < WTF_ARRAY_LENGTH(entityMaps); ++entityIndex) {
76                if (text[i] == entityMaps[entityIndex].entity && entityMaps[entityIndex].mask & entityMask) {
77                    result.append(text + positionAfterLastEntity, i - positionAfterLastEntity);
78                    result.append(entityMaps[entityIndex].reference);
79                    positionAfterLastEntity = i + 1;
80                    break;
81                }
82            }
83        }
84        result.append(text + positionAfterLastEntity, length - positionAfterLastEntity);
85    } else {
86        const UChar* text = source.characters16() + offset;
87
88        size_t positionAfterLastEntity = 0;
89        for (size_t i = 0; i < length; ++i) {
90            for (size_t entityIndex = 0; entityIndex < WTF_ARRAY_LENGTH(entityMaps); ++entityIndex) {
91                if (text[i] == entityMaps[entityIndex].entity && entityMaps[entityIndex].mask & entityMask) {
92                    result.append(text + positionAfterLastEntity, i - positionAfterLastEntity);
93                    result.append(entityMaps[entityIndex].reference);
94                    positionAfterLastEntity = i + 1;
95                    break;
96                }
97            }
98        }
99        result.append(text + positionAfterLastEntity, length - positionAfterLastEntity);
100    }
101}
102
103MarkupAccumulator::MarkupAccumulator(Vector<Node*>* nodes, EAbsoluteURLs resolveUrlsMethod, const Range* range)
104    : m_nodes(nodes)
105    , m_range(range)
106    , m_resolveURLsMethod(resolveUrlsMethod)
107{
108}
109
110MarkupAccumulator::~MarkupAccumulator()
111{
112}
113
114String MarkupAccumulator::serializeNodes(Node* targetNode, Node* nodeToSkip, EChildrenOnly childrenOnly)
115{
116    return serializeNodes(targetNode, nodeToSkip, childrenOnly, 0);
117}
118
119String MarkupAccumulator::serializeNodes(Node* targetNode, Node* nodeToSkip, EChildrenOnly childrenOnly, Vector<QualifiedName>* tagNamesToSkip)
120{
121    serializeNodesWithNamespaces(targetNode, nodeToSkip, childrenOnly, 0, tagNamesToSkip);
122    return m_markup.toString();
123}
124
125void MarkupAccumulator::serializeNodesWithNamespaces(Node* targetNode, Node* nodeToSkip, EChildrenOnly childrenOnly, const Namespaces* namespaces, Vector<QualifiedName>* tagNamesToSkip)
126{
127    if (targetNode == nodeToSkip)
128        return;
129
130    if (tagNamesToSkip) {
131        for (size_t i = 0; i < tagNamesToSkip->size(); ++i) {
132            if (targetNode->hasTagName(tagNamesToSkip->at(i)))
133                return;
134        }
135    }
136
137    Namespaces namespaceHash;
138    if (namespaces)
139        namespaceHash = *namespaces;
140
141    if (!childrenOnly)
142        appendStartTag(targetNode, &namespaceHash);
143
144    if (!(targetNode->document()->isHTMLDocument() && elementCannotHaveEndTag(targetNode))) {
145#if ENABLE(TEMPLATE_ELEMENT)
146        Node* current = targetNode->hasTagName(templateTag) ? toHTMLTemplateElement(targetNode)->content()->firstChild() : targetNode->firstChild();
147#else
148        Node* current = targetNode->firstChild();
149#endif
150        for ( ; current; current = current->nextSibling())
151            serializeNodesWithNamespaces(current, nodeToSkip, IncludeNode, &namespaceHash, tagNamesToSkip);
152    }
153
154    if (!childrenOnly)
155        appendEndTag(targetNode);
156}
157
158String MarkupAccumulator::resolveURLIfNeeded(const Element* element, const String& urlString) const
159{
160    switch (m_resolveURLsMethod) {
161    case ResolveAllURLs:
162        return element->document()->completeURL(urlString).string();
163
164    case ResolveNonLocalURLs:
165        if (!element->document()->url().isLocalFile())
166            return element->document()->completeURL(urlString).string();
167        break;
168
169    case DoNotResolveURLs:
170        break;
171    }
172    return urlString;
173}
174
175void MarkupAccumulator::appendString(const String& string)
176{
177    m_markup.append(string);
178}
179
180void MarkupAccumulator::appendStartTag(Node* node, Namespaces* namespaces)
181{
182    appendStartMarkup(m_markup, node, namespaces);
183    if (m_nodes)
184        m_nodes->append(node);
185}
186
187void MarkupAccumulator::appendEndTag(Node* node)
188{
189    appendEndMarkup(m_markup, node);
190}
191
192size_t MarkupAccumulator::totalLength(const Vector<String>& strings)
193{
194    size_t length = 0;
195    for (size_t i = 0; i < strings.size(); ++i)
196        length += strings[i].length();
197    return length;
198}
199
200void MarkupAccumulator::concatenateMarkup(StringBuilder& result)
201{
202    result.append(m_markup);
203}
204
205void MarkupAccumulator::appendAttributeValue(StringBuilder& result, const String& attribute, bool documentIsHTML)
206{
207    appendCharactersReplacingEntities(result, attribute, 0, attribute.length(),
208        documentIsHTML ? EntityMaskInHTMLAttributeValue : EntityMaskInAttributeValue);
209}
210
211void MarkupAccumulator::appendCustomAttributes(StringBuilder&, Element*, Namespaces*)
212{
213}
214
215void MarkupAccumulator::appendQuotedURLAttributeValue(StringBuilder& result, const Element* element, const Attribute& attribute)
216{
217    ASSERT(element->isURLAttribute(attribute));
218    const String resolvedURLString = resolveURLIfNeeded(element, attribute.value());
219    UChar quoteChar = '"';
220    String strippedURLString = resolvedURLString.stripWhiteSpace();
221    if (protocolIsJavaScript(strippedURLString)) {
222        // minimal escaping for javascript urls
223        if (strippedURLString.contains('"')) {
224            if (strippedURLString.contains('\''))
225                strippedURLString.replaceWithLiteral('"', "&quot;");
226            else
227                quoteChar = '\'';
228        }
229        result.append(quoteChar);
230        result.append(strippedURLString);
231        result.append(quoteChar);
232        return;
233    }
234
235    // FIXME: This does not fully match other browsers. Firefox percent-escapes non-ASCII characters for innerHTML.
236    result.append(quoteChar);
237    appendAttributeValue(result, resolvedURLString, false);
238    result.append(quoteChar);
239}
240
241void MarkupAccumulator::appendNodeValue(StringBuilder& result, const Node* node, const Range* range, EntityMask entityMask)
242{
243    const String str = node->nodeValue();
244    unsigned length = str.length();
245    unsigned start = 0;
246
247    if (range) {
248        if (node == range->endContainer())
249            length = range->endOffset();
250        if (node == range->startContainer()) {
251            start = range->startOffset();
252            length -= start;
253        }
254    }
255
256    appendCharactersReplacingEntities(result, str, start, length, entityMask);
257}
258
259bool MarkupAccumulator::shouldAddNamespaceElement(const Element* element)
260{
261    // Don't add namespace attribute if it is already defined for this elem.
262    const AtomicString& prefix = element->prefix();
263    if (prefix.isEmpty())
264        return !element->hasAttribute(xmlnsAtom);
265
266    DEFINE_STATIC_LOCAL(String, xmlnsWithColon, (ASCIILiteral("xmlns:")));
267    return !element->hasAttribute(xmlnsWithColon + prefix);
268}
269
270bool MarkupAccumulator::shouldAddNamespaceAttribute(const Attribute& attribute, Namespaces& namespaces)
271{
272    namespaces.checkConsistency();
273
274    // Don't add namespace attributes twice
275    if (attribute.name() == XMLNSNames::xmlnsAttr) {
276        namespaces.set(emptyAtom.impl(), attribute.value().impl());
277        return false;
278    }
279
280    QualifiedName xmlnsPrefixAttr(xmlnsAtom, attribute.localName(), XMLNSNames::xmlnsNamespaceURI);
281    if (attribute.name() == xmlnsPrefixAttr) {
282        namespaces.set(attribute.localName().impl(), attribute.value().impl());
283        return false;
284    }
285
286    return true;
287}
288
289void MarkupAccumulator::appendNamespace(StringBuilder& result, const AtomicString& prefix, const AtomicString& namespaceURI, Namespaces& namespaces)
290{
291    namespaces.checkConsistency();
292    if (namespaceURI.isEmpty())
293        return;
294
295    // Use emptyAtoms's impl() for both null and empty strings since the HashMap can't handle 0 as a key
296    AtomicStringImpl* pre = prefix.isEmpty() ? emptyAtom.impl() : prefix.impl();
297    AtomicStringImpl* foundNS = namespaces.get(pre);
298    if (foundNS != namespaceURI.impl()) {
299        namespaces.set(pre, namespaceURI.impl());
300        result.append(' ');
301        result.append(xmlnsAtom.string());
302        if (!prefix.isEmpty()) {
303            result.append(':');
304            result.append(prefix);
305        }
306
307        result.append('=');
308        result.append('"');
309        appendAttributeValue(result, namespaceURI, false);
310        result.append('"');
311    }
312}
313
314EntityMask MarkupAccumulator::entityMaskForText(Text* text) const
315{
316    const QualifiedName* parentName = 0;
317    if (text->parentElement())
318        parentName = &(text->parentElement())->tagQName();
319
320    if (parentName && (*parentName == scriptTag || *parentName == styleTag || *parentName == xmpTag))
321        return EntityMaskInCDATA;
322
323    return text->document()->isHTMLDocument() ? EntityMaskInHTMLPCDATA : EntityMaskInPCDATA;
324}
325
326void MarkupAccumulator::appendText(StringBuilder& result, Text* text)
327{
328    appendNodeValue(result, text, m_range, entityMaskForText(text));
329}
330
331void MarkupAccumulator::appendComment(StringBuilder& result, const String& comment)
332{
333    // FIXME: Comment content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "-->".
334    result.appendLiteral("<!--");
335    result.append(comment);
336    result.appendLiteral("-->");
337}
338
339void MarkupAccumulator::appendXMLDeclaration(StringBuilder& result, const Document* document)
340{
341    if (!document->hasXMLDeclaration())
342        return;
343
344    result.appendLiteral("<?xml version=\"");
345    result.append(document->xmlVersion());
346    const String& encoding = document->xmlEncoding();
347    if (!encoding.isEmpty()) {
348        result.appendLiteral("\" encoding=\"");
349        result.append(encoding);
350    }
351    if (document->xmlStandaloneStatus() != Document::StandaloneUnspecified) {
352        result.appendLiteral("\" standalone=\"");
353        if (document->xmlStandalone())
354            result.appendLiteral("yes");
355        else
356            result.appendLiteral("no");
357    }
358
359    result.appendLiteral("\"?>");
360}
361
362void MarkupAccumulator::appendDocumentType(StringBuilder& result, const DocumentType* n)
363{
364    if (n->name().isEmpty())
365        return;
366
367    result.appendLiteral("<!DOCTYPE ");
368    result.append(n->name());
369    if (!n->publicId().isEmpty()) {
370        result.appendLiteral(" PUBLIC \"");
371        result.append(n->publicId());
372        result.append('"');
373        if (!n->systemId().isEmpty()) {
374            result.append(' ');
375            result.append('"');
376            result.append(n->systemId());
377            result.append('"');
378        }
379    } else if (!n->systemId().isEmpty()) {
380        result.appendLiteral(" SYSTEM \"");
381        result.append(n->systemId());
382        result.append('"');
383    }
384    if (!n->internalSubset().isEmpty()) {
385        result.append(' ');
386        result.append('[');
387        result.append(n->internalSubset());
388        result.append(']');
389    }
390    result.append('>');
391}
392
393void MarkupAccumulator::appendProcessingInstruction(StringBuilder& result, const String& target, const String& data)
394{
395    // FIXME: PI data is not escaped, but XMLSerializer (and possibly other callers) this should raise an exception if it includes "?>".
396    result.append('<');
397    result.append('?');
398    result.append(target);
399    result.append(' ');
400    result.append(data);
401    result.append('?');
402    result.append('>');
403}
404
405void MarkupAccumulator::appendElement(StringBuilder& result, Element* element, Namespaces* namespaces)
406{
407    appendOpenTag(result, element, namespaces);
408
409    if (element->hasAttributes()) {
410        unsigned length = element->attributeCount();
411        for (unsigned int i = 0; i < length; i++)
412            appendAttribute(result, element, *element->attributeItem(i), namespaces);
413    }
414
415    // Give an opportunity to subclasses to add their own attributes.
416    appendCustomAttributes(result, element, namespaces);
417
418    appendCloseTag(result, element);
419}
420
421void MarkupAccumulator::appendOpenTag(StringBuilder& result, Element* element, Namespaces* namespaces)
422{
423    result.append('<');
424    result.append(element->nodeNamePreservingCase());
425    if (!element->document()->isHTMLDocument() && namespaces && shouldAddNamespaceElement(element))
426        appendNamespace(result, element->prefix(), element->namespaceURI(), *namespaces);
427}
428
429void MarkupAccumulator::appendCloseTag(StringBuilder& result, Element* element)
430{
431    if (shouldSelfClose(element)) {
432        if (element->isHTMLElement())
433            result.append(' '); // XHTML 1.0 <-> HTML compatibility.
434        result.append('/');
435    }
436    result.append('>');
437}
438
439static inline bool attributeIsInSerializedNamespace(const Attribute& attribute)
440{
441    return attribute.namespaceURI() == XMLNames::xmlNamespaceURI
442        || attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI
443        || attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI;
444}
445
446void MarkupAccumulator::appendAttribute(StringBuilder& result, Element* element, const Attribute& attribute, Namespaces* namespaces)
447{
448    bool documentIsHTML = element->document()->isHTMLDocument();
449
450    result.append(' ');
451
452    if (documentIsHTML && !attributeIsInSerializedNamespace(attribute))
453        result.append(attribute.name().localName());
454    else {
455        QualifiedName prefixedName = attribute.name();
456        if (attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI) {
457            if (!attribute.prefix())
458                prefixedName.setPrefix(xlinkAtom);
459        } else if (attribute.namespaceURI() == XMLNames::xmlNamespaceURI) {
460            if (!attribute.prefix())
461                prefixedName.setPrefix(xmlAtom);
462        } else if (attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI) {
463            if (attribute.name() != XMLNSNames::xmlnsAttr && !attribute.prefix())
464                prefixedName.setPrefix(xmlnsAtom);
465        }
466        result.append(prefixedName.toString());
467    }
468
469    result.append('=');
470
471    if (element->isURLAttribute(attribute))
472        appendQuotedURLAttributeValue(result, element, attribute);
473    else {
474        result.append('"');
475        appendAttributeValue(result, attribute.value(), documentIsHTML);
476        result.append('"');
477    }
478
479    if (!documentIsHTML && namespaces && shouldAddNamespaceAttribute(attribute, *namespaces))
480        appendNamespace(result, attribute.prefix(), attribute.namespaceURI(), *namespaces);
481}
482
483void MarkupAccumulator::appendCDATASection(StringBuilder& result, const String& section)
484{
485    // FIXME: CDATA content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "]]>".
486    result.appendLiteral("<![CDATA[");
487    result.append(section);
488    result.appendLiteral("]]>");
489}
490
491void MarkupAccumulator::appendStartMarkup(StringBuilder& result, const Node* node, Namespaces* namespaces)
492{
493    if (namespaces)
494        namespaces->checkConsistency();
495
496    switch (node->nodeType()) {
497    case Node::TEXT_NODE:
498        appendText(result, toText(const_cast<Node*>(node)));
499        break;
500    case Node::COMMENT_NODE:
501        appendComment(result, static_cast<const Comment*>(node)->data());
502        break;
503    case Node::DOCUMENT_NODE:
504        appendXMLDeclaration(result, toDocument(node));
505        break;
506    case Node::DOCUMENT_FRAGMENT_NODE:
507        break;
508    case Node::DOCUMENT_TYPE_NODE:
509        appendDocumentType(result, static_cast<const DocumentType*>(node));
510        break;
511    case Node::PROCESSING_INSTRUCTION_NODE:
512        appendProcessingInstruction(result, static_cast<const ProcessingInstruction*>(node)->target(), static_cast<const ProcessingInstruction*>(node)->data());
513        break;
514    case Node::ELEMENT_NODE:
515        appendElement(result, toElement(const_cast<Node*>(node)), namespaces);
516        break;
517    case Node::CDATA_SECTION_NODE:
518        appendCDATASection(result, static_cast<const CDATASection*>(node)->data());
519        break;
520    case Node::ATTRIBUTE_NODE:
521    case Node::ENTITY_NODE:
522    case Node::ENTITY_REFERENCE_NODE:
523    case Node::NOTATION_NODE:
524    case Node::XPATH_NAMESPACE_NODE:
525        ASSERT_NOT_REACHED();
526        break;
527    }
528}
529
530// Rules of self-closure
531// 1. No elements in HTML documents use the self-closing syntax.
532// 2. Elements w/ children never self-close because they use a separate end tag.
533// 3. HTML elements which do not have a "forbidden" end tag will close with a separate end tag.
534// 4. Other elements self-close.
535bool MarkupAccumulator::shouldSelfClose(const Node* node)
536{
537    if (node->document()->isHTMLDocument())
538        return false;
539    if (node->hasChildNodes())
540        return false;
541    if (node->isHTMLElement() && !elementCannotHaveEndTag(node))
542        return false;
543    return true;
544}
545
546bool MarkupAccumulator::elementCannotHaveEndTag(const Node* node)
547{
548    if (!node->isHTMLElement())
549        return false;
550
551    // FIXME: ieForbidsInsertHTML may not be the right function to call here
552    // ieForbidsInsertHTML is used to disallow setting innerHTML/outerHTML
553    // or createContextualFragment.  It does not necessarily align with
554    // which elements should be serialized w/o end tags.
555    return static_cast<const HTMLElement*>(node)->ieForbidsInsertHTML();
556}
557
558void MarkupAccumulator::appendEndMarkup(StringBuilder& result, const Node* node)
559{
560    if (!node->isElementNode() || shouldSelfClose(node) || (!node->hasChildNodes() && elementCannotHaveEndTag(node)))
561        return;
562
563    result.append('<');
564    result.append('/');
565    result.append(toElement(node)->nodeNamePreservingCase());
566    result.append('>');
567}
568
569}
570