1/* 2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved. 3 * Copyright (C) 2009, 2010 Google Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 15 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 16 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 17 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 18 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 19 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 20 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#include "config.h" 28#include "MarkupAccumulator.h" 29 30#include "CDATASection.h" 31#include "Comment.h" 32#include "DocumentFragment.h" 33#include "DocumentType.h" 34#include "Editor.h" 35#include "HTMLElement.h" 36#include "HTMLNames.h" 37#include "HTMLTemplateElement.h" 38#include "KURL.h" 39#include "ProcessingInstruction.h" 40#include "XLinkNames.h" 41#include "XMLNSNames.h" 42#include "XMLNames.h" 43#include <wtf/unicode/CharacterNames.h> 44 45namespace WebCore { 46 47using namespace HTMLNames; 48 49void MarkupAccumulator::appendCharactersReplacingEntities(StringBuilder& result, const String& source, unsigned offset, unsigned length, EntityMask entityMask) 50{ 51 DEFINE_STATIC_LOCAL(const String, ampReference, (ASCIILiteral("&"))); 52 DEFINE_STATIC_LOCAL(const String, ltReference, (ASCIILiteral("<"))); 53 DEFINE_STATIC_LOCAL(const String, gtReference, (ASCIILiteral(">"))); 54 DEFINE_STATIC_LOCAL(const String, quotReference, (ASCIILiteral("""))); 55 DEFINE_STATIC_LOCAL(const String, nbspReference, (ASCIILiteral(" "))); 56 57 static const EntityDescription entityMaps[] = { 58 { '&', ampReference, EntityAmp }, 59 { '<', ltReference, EntityLt }, 60 { '>', gtReference, EntityGt }, 61 { '"', quotReference, EntityQuot }, 62 { noBreakSpace, nbspReference, EntityNbsp }, 63 }; 64 65 if (!(offset + length)) 66 return; 67 68 ASSERT(offset + length <= source.length()); 69 70 if (source.is8Bit()) { 71 const LChar* text = source.characters8() + offset; 72 73 size_t positionAfterLastEntity = 0; 74 for (size_t i = 0; i < length; ++i) { 75 for (size_t entityIndex = 0; entityIndex < WTF_ARRAY_LENGTH(entityMaps); ++entityIndex) { 76 if (text[i] == entityMaps[entityIndex].entity && entityMaps[entityIndex].mask & entityMask) { 77 result.append(text + positionAfterLastEntity, i - positionAfterLastEntity); 78 result.append(entityMaps[entityIndex].reference); 79 positionAfterLastEntity = i + 1; 80 break; 81 } 82 } 83 } 84 result.append(text + positionAfterLastEntity, length - positionAfterLastEntity); 85 } else { 86 const UChar* text = source.characters16() + offset; 87 88 size_t positionAfterLastEntity = 0; 89 for (size_t i = 0; i < length; ++i) { 90 for (size_t entityIndex = 0; entityIndex < WTF_ARRAY_LENGTH(entityMaps); ++entityIndex) { 91 if (text[i] == entityMaps[entityIndex].entity && entityMaps[entityIndex].mask & entityMask) { 92 result.append(text + positionAfterLastEntity, i - positionAfterLastEntity); 93 result.append(entityMaps[entityIndex].reference); 94 positionAfterLastEntity = i + 1; 95 break; 96 } 97 } 98 } 99 result.append(text + positionAfterLastEntity, length - positionAfterLastEntity); 100 } 101} 102 103MarkupAccumulator::MarkupAccumulator(Vector<Node*>* nodes, EAbsoluteURLs resolveUrlsMethod, const Range* range) 104 : m_nodes(nodes) 105 , m_range(range) 106 , m_resolveURLsMethod(resolveUrlsMethod) 107{ 108} 109 110MarkupAccumulator::~MarkupAccumulator() 111{ 112} 113 114String MarkupAccumulator::serializeNodes(Node* targetNode, Node* nodeToSkip, EChildrenOnly childrenOnly) 115{ 116 return serializeNodes(targetNode, nodeToSkip, childrenOnly, 0); 117} 118 119String MarkupAccumulator::serializeNodes(Node* targetNode, Node* nodeToSkip, EChildrenOnly childrenOnly, Vector<QualifiedName>* tagNamesToSkip) 120{ 121 serializeNodesWithNamespaces(targetNode, nodeToSkip, childrenOnly, 0, tagNamesToSkip); 122 return m_markup.toString(); 123} 124 125void MarkupAccumulator::serializeNodesWithNamespaces(Node* targetNode, Node* nodeToSkip, EChildrenOnly childrenOnly, const Namespaces* namespaces, Vector<QualifiedName>* tagNamesToSkip) 126{ 127 if (targetNode == nodeToSkip) 128 return; 129 130 if (tagNamesToSkip) { 131 for (size_t i = 0; i < tagNamesToSkip->size(); ++i) { 132 if (targetNode->hasTagName(tagNamesToSkip->at(i))) 133 return; 134 } 135 } 136 137 Namespaces namespaceHash; 138 if (namespaces) 139 namespaceHash = *namespaces; 140 141 if (!childrenOnly) 142 appendStartTag(targetNode, &namespaceHash); 143 144 if (!(targetNode->document()->isHTMLDocument() && elementCannotHaveEndTag(targetNode))) { 145#if ENABLE(TEMPLATE_ELEMENT) 146 Node* current = targetNode->hasTagName(templateTag) ? toHTMLTemplateElement(targetNode)->content()->firstChild() : targetNode->firstChild(); 147#else 148 Node* current = targetNode->firstChild(); 149#endif 150 for ( ; current; current = current->nextSibling()) 151 serializeNodesWithNamespaces(current, nodeToSkip, IncludeNode, &namespaceHash, tagNamesToSkip); 152 } 153 154 if (!childrenOnly) 155 appendEndTag(targetNode); 156} 157 158String MarkupAccumulator::resolveURLIfNeeded(const Element* element, const String& urlString) const 159{ 160 switch (m_resolveURLsMethod) { 161 case ResolveAllURLs: 162 return element->document()->completeURL(urlString).string(); 163 164 case ResolveNonLocalURLs: 165 if (!element->document()->url().isLocalFile()) 166 return element->document()->completeURL(urlString).string(); 167 break; 168 169 case DoNotResolveURLs: 170 break; 171 } 172 return urlString; 173} 174 175void MarkupAccumulator::appendString(const String& string) 176{ 177 m_markup.append(string); 178} 179 180void MarkupAccumulator::appendStartTag(Node* node, Namespaces* namespaces) 181{ 182 appendStartMarkup(m_markup, node, namespaces); 183 if (m_nodes) 184 m_nodes->append(node); 185} 186 187void MarkupAccumulator::appendEndTag(Node* node) 188{ 189 appendEndMarkup(m_markup, node); 190} 191 192size_t MarkupAccumulator::totalLength(const Vector<String>& strings) 193{ 194 size_t length = 0; 195 for (size_t i = 0; i < strings.size(); ++i) 196 length += strings[i].length(); 197 return length; 198} 199 200void MarkupAccumulator::concatenateMarkup(StringBuilder& result) 201{ 202 result.append(m_markup); 203} 204 205void MarkupAccumulator::appendAttributeValue(StringBuilder& result, const String& attribute, bool documentIsHTML) 206{ 207 appendCharactersReplacingEntities(result, attribute, 0, attribute.length(), 208 documentIsHTML ? EntityMaskInHTMLAttributeValue : EntityMaskInAttributeValue); 209} 210 211void MarkupAccumulator::appendCustomAttributes(StringBuilder&, Element*, Namespaces*) 212{ 213} 214 215void MarkupAccumulator::appendQuotedURLAttributeValue(StringBuilder& result, const Element* element, const Attribute& attribute) 216{ 217 ASSERT(element->isURLAttribute(attribute)); 218 const String resolvedURLString = resolveURLIfNeeded(element, attribute.value()); 219 UChar quoteChar = '"'; 220 String strippedURLString = resolvedURLString.stripWhiteSpace(); 221 if (protocolIsJavaScript(strippedURLString)) { 222 // minimal escaping for javascript urls 223 if (strippedURLString.contains('"')) { 224 if (strippedURLString.contains('\'')) 225 strippedURLString.replaceWithLiteral('"', """); 226 else 227 quoteChar = '\''; 228 } 229 result.append(quoteChar); 230 result.append(strippedURLString); 231 result.append(quoteChar); 232 return; 233 } 234 235 // FIXME: This does not fully match other browsers. Firefox percent-escapes non-ASCII characters for innerHTML. 236 result.append(quoteChar); 237 appendAttributeValue(result, resolvedURLString, false); 238 result.append(quoteChar); 239} 240 241void MarkupAccumulator::appendNodeValue(StringBuilder& result, const Node* node, const Range* range, EntityMask entityMask) 242{ 243 const String str = node->nodeValue(); 244 unsigned length = str.length(); 245 unsigned start = 0; 246 247 if (range) { 248 if (node == range->endContainer()) 249 length = range->endOffset(); 250 if (node == range->startContainer()) { 251 start = range->startOffset(); 252 length -= start; 253 } 254 } 255 256 appendCharactersReplacingEntities(result, str, start, length, entityMask); 257} 258 259bool MarkupAccumulator::shouldAddNamespaceElement(const Element* element) 260{ 261 // Don't add namespace attribute if it is already defined for this elem. 262 const AtomicString& prefix = element->prefix(); 263 if (prefix.isEmpty()) 264 return !element->hasAttribute(xmlnsAtom); 265 266 DEFINE_STATIC_LOCAL(String, xmlnsWithColon, (ASCIILiteral("xmlns:"))); 267 return !element->hasAttribute(xmlnsWithColon + prefix); 268} 269 270bool MarkupAccumulator::shouldAddNamespaceAttribute(const Attribute& attribute, Namespaces& namespaces) 271{ 272 namespaces.checkConsistency(); 273 274 // Don't add namespace attributes twice 275 if (attribute.name() == XMLNSNames::xmlnsAttr) { 276 namespaces.set(emptyAtom.impl(), attribute.value().impl()); 277 return false; 278 } 279 280 QualifiedName xmlnsPrefixAttr(xmlnsAtom, attribute.localName(), XMLNSNames::xmlnsNamespaceURI); 281 if (attribute.name() == xmlnsPrefixAttr) { 282 namespaces.set(attribute.localName().impl(), attribute.value().impl()); 283 return false; 284 } 285 286 return true; 287} 288 289void MarkupAccumulator::appendNamespace(StringBuilder& result, const AtomicString& prefix, const AtomicString& namespaceURI, Namespaces& namespaces) 290{ 291 namespaces.checkConsistency(); 292 if (namespaceURI.isEmpty()) 293 return; 294 295 // Use emptyAtoms's impl() for both null and empty strings since the HashMap can't handle 0 as a key 296 AtomicStringImpl* pre = prefix.isEmpty() ? emptyAtom.impl() : prefix.impl(); 297 AtomicStringImpl* foundNS = namespaces.get(pre); 298 if (foundNS != namespaceURI.impl()) { 299 namespaces.set(pre, namespaceURI.impl()); 300 result.append(' '); 301 result.append(xmlnsAtom.string()); 302 if (!prefix.isEmpty()) { 303 result.append(':'); 304 result.append(prefix); 305 } 306 307 result.append('='); 308 result.append('"'); 309 appendAttributeValue(result, namespaceURI, false); 310 result.append('"'); 311 } 312} 313 314EntityMask MarkupAccumulator::entityMaskForText(Text* text) const 315{ 316 const QualifiedName* parentName = 0; 317 if (text->parentElement()) 318 parentName = &(text->parentElement())->tagQName(); 319 320 if (parentName && (*parentName == scriptTag || *parentName == styleTag || *parentName == xmpTag)) 321 return EntityMaskInCDATA; 322 323 return text->document()->isHTMLDocument() ? EntityMaskInHTMLPCDATA : EntityMaskInPCDATA; 324} 325 326void MarkupAccumulator::appendText(StringBuilder& result, Text* text) 327{ 328 appendNodeValue(result, text, m_range, entityMaskForText(text)); 329} 330 331void MarkupAccumulator::appendComment(StringBuilder& result, const String& comment) 332{ 333 // FIXME: Comment content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "-->". 334 result.appendLiteral("<!--"); 335 result.append(comment); 336 result.appendLiteral("-->"); 337} 338 339void MarkupAccumulator::appendXMLDeclaration(StringBuilder& result, const Document* document) 340{ 341 if (!document->hasXMLDeclaration()) 342 return; 343 344 result.appendLiteral("<?xml version=\""); 345 result.append(document->xmlVersion()); 346 const String& encoding = document->xmlEncoding(); 347 if (!encoding.isEmpty()) { 348 result.appendLiteral("\" encoding=\""); 349 result.append(encoding); 350 } 351 if (document->xmlStandaloneStatus() != Document::StandaloneUnspecified) { 352 result.appendLiteral("\" standalone=\""); 353 if (document->xmlStandalone()) 354 result.appendLiteral("yes"); 355 else 356 result.appendLiteral("no"); 357 } 358 359 result.appendLiteral("\"?>"); 360} 361 362void MarkupAccumulator::appendDocumentType(StringBuilder& result, const DocumentType* n) 363{ 364 if (n->name().isEmpty()) 365 return; 366 367 result.appendLiteral("<!DOCTYPE "); 368 result.append(n->name()); 369 if (!n->publicId().isEmpty()) { 370 result.appendLiteral(" PUBLIC \""); 371 result.append(n->publicId()); 372 result.append('"'); 373 if (!n->systemId().isEmpty()) { 374 result.append(' '); 375 result.append('"'); 376 result.append(n->systemId()); 377 result.append('"'); 378 } 379 } else if (!n->systemId().isEmpty()) { 380 result.appendLiteral(" SYSTEM \""); 381 result.append(n->systemId()); 382 result.append('"'); 383 } 384 if (!n->internalSubset().isEmpty()) { 385 result.append(' '); 386 result.append('['); 387 result.append(n->internalSubset()); 388 result.append(']'); 389 } 390 result.append('>'); 391} 392 393void MarkupAccumulator::appendProcessingInstruction(StringBuilder& result, const String& target, const String& data) 394{ 395 // FIXME: PI data is not escaped, but XMLSerializer (and possibly other callers) this should raise an exception if it includes "?>". 396 result.append('<'); 397 result.append('?'); 398 result.append(target); 399 result.append(' '); 400 result.append(data); 401 result.append('?'); 402 result.append('>'); 403} 404 405void MarkupAccumulator::appendElement(StringBuilder& result, Element* element, Namespaces* namespaces) 406{ 407 appendOpenTag(result, element, namespaces); 408 409 if (element->hasAttributes()) { 410 unsigned length = element->attributeCount(); 411 for (unsigned int i = 0; i < length; i++) 412 appendAttribute(result, element, *element->attributeItem(i), namespaces); 413 } 414 415 // Give an opportunity to subclasses to add their own attributes. 416 appendCustomAttributes(result, element, namespaces); 417 418 appendCloseTag(result, element); 419} 420 421void MarkupAccumulator::appendOpenTag(StringBuilder& result, Element* element, Namespaces* namespaces) 422{ 423 result.append('<'); 424 result.append(element->nodeNamePreservingCase()); 425 if (!element->document()->isHTMLDocument() && namespaces && shouldAddNamespaceElement(element)) 426 appendNamespace(result, element->prefix(), element->namespaceURI(), *namespaces); 427} 428 429void MarkupAccumulator::appendCloseTag(StringBuilder& result, Element* element) 430{ 431 if (shouldSelfClose(element)) { 432 if (element->isHTMLElement()) 433 result.append(' '); // XHTML 1.0 <-> HTML compatibility. 434 result.append('/'); 435 } 436 result.append('>'); 437} 438 439static inline bool attributeIsInSerializedNamespace(const Attribute& attribute) 440{ 441 return attribute.namespaceURI() == XMLNames::xmlNamespaceURI 442 || attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI 443 || attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI; 444} 445 446void MarkupAccumulator::appendAttribute(StringBuilder& result, Element* element, const Attribute& attribute, Namespaces* namespaces) 447{ 448 bool documentIsHTML = element->document()->isHTMLDocument(); 449 450 result.append(' '); 451 452 if (documentIsHTML && !attributeIsInSerializedNamespace(attribute)) 453 result.append(attribute.name().localName()); 454 else { 455 QualifiedName prefixedName = attribute.name(); 456 if (attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI) { 457 if (!attribute.prefix()) 458 prefixedName.setPrefix(xlinkAtom); 459 } else if (attribute.namespaceURI() == XMLNames::xmlNamespaceURI) { 460 if (!attribute.prefix()) 461 prefixedName.setPrefix(xmlAtom); 462 } else if (attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI) { 463 if (attribute.name() != XMLNSNames::xmlnsAttr && !attribute.prefix()) 464 prefixedName.setPrefix(xmlnsAtom); 465 } 466 result.append(prefixedName.toString()); 467 } 468 469 result.append('='); 470 471 if (element->isURLAttribute(attribute)) 472 appendQuotedURLAttributeValue(result, element, attribute); 473 else { 474 result.append('"'); 475 appendAttributeValue(result, attribute.value(), documentIsHTML); 476 result.append('"'); 477 } 478 479 if (!documentIsHTML && namespaces && shouldAddNamespaceAttribute(attribute, *namespaces)) 480 appendNamespace(result, attribute.prefix(), attribute.namespaceURI(), *namespaces); 481} 482 483void MarkupAccumulator::appendCDATASection(StringBuilder& result, const String& section) 484{ 485 // FIXME: CDATA content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "]]>". 486 result.appendLiteral("<![CDATA["); 487 result.append(section); 488 result.appendLiteral("]]>"); 489} 490 491void MarkupAccumulator::appendStartMarkup(StringBuilder& result, const Node* node, Namespaces* namespaces) 492{ 493 if (namespaces) 494 namespaces->checkConsistency(); 495 496 switch (node->nodeType()) { 497 case Node::TEXT_NODE: 498 appendText(result, toText(const_cast<Node*>(node))); 499 break; 500 case Node::COMMENT_NODE: 501 appendComment(result, static_cast<const Comment*>(node)->data()); 502 break; 503 case Node::DOCUMENT_NODE: 504 appendXMLDeclaration(result, toDocument(node)); 505 break; 506 case Node::DOCUMENT_FRAGMENT_NODE: 507 break; 508 case Node::DOCUMENT_TYPE_NODE: 509 appendDocumentType(result, static_cast<const DocumentType*>(node)); 510 break; 511 case Node::PROCESSING_INSTRUCTION_NODE: 512 appendProcessingInstruction(result, static_cast<const ProcessingInstruction*>(node)->target(), static_cast<const ProcessingInstruction*>(node)->data()); 513 break; 514 case Node::ELEMENT_NODE: 515 appendElement(result, toElement(const_cast<Node*>(node)), namespaces); 516 break; 517 case Node::CDATA_SECTION_NODE: 518 appendCDATASection(result, static_cast<const CDATASection*>(node)->data()); 519 break; 520 case Node::ATTRIBUTE_NODE: 521 case Node::ENTITY_NODE: 522 case Node::ENTITY_REFERENCE_NODE: 523 case Node::NOTATION_NODE: 524 case Node::XPATH_NAMESPACE_NODE: 525 ASSERT_NOT_REACHED(); 526 break; 527 } 528} 529 530// Rules of self-closure 531// 1. No elements in HTML documents use the self-closing syntax. 532// 2. Elements w/ children never self-close because they use a separate end tag. 533// 3. HTML elements which do not have a "forbidden" end tag will close with a separate end tag. 534// 4. Other elements self-close. 535bool MarkupAccumulator::shouldSelfClose(const Node* node) 536{ 537 if (node->document()->isHTMLDocument()) 538 return false; 539 if (node->hasChildNodes()) 540 return false; 541 if (node->isHTMLElement() && !elementCannotHaveEndTag(node)) 542 return false; 543 return true; 544} 545 546bool MarkupAccumulator::elementCannotHaveEndTag(const Node* node) 547{ 548 if (!node->isHTMLElement()) 549 return false; 550 551 // FIXME: ieForbidsInsertHTML may not be the right function to call here 552 // ieForbidsInsertHTML is used to disallow setting innerHTML/outerHTML 553 // or createContextualFragment. It does not necessarily align with 554 // which elements should be serialized w/o end tags. 555 return static_cast<const HTMLElement*>(node)->ieForbidsInsertHTML(); 556} 557 558void MarkupAccumulator::appendEndMarkup(StringBuilder& result, const Node* node) 559{ 560 if (!node->isElementNode() || shouldSelfClose(node) || (!node->hasChildNodes() && elementCannotHaveEndTag(node))) 561 return; 562 563 result.append('<'); 564 result.append('/'); 565 result.append(toElement(node)->nodeNamePreservingCase()); 566 result.append('>'); 567} 568 569} 570