1/* 2 * Copyright (C) 2000 Peter Kelly (pmk@post.com) 3 * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved. 4 * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org) 5 * Copyright (C) 2007 Samuel Weinig (sam@webkit.org) 6 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies) 7 * Copyright (C) 2008 Holger Hans Peter Freyther 8 * Copyright (C) 2008, 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) 9 * 10 * This library is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU Library General Public 12 * License as published by the Free Software Foundation; either 13 * version 2 of the License, or (at your option) any later version. 14 * 15 * This library is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * Library General Public License for more details. 19 * 20 * You should have received a copy of the GNU Library General Public License 21 * along with this library; see the file COPYING.LIB. If not, write to 22 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 23 * Boston, MA 02110-1301, USA. 24 */ 25 26#include "config.h" 27#include "XMLDocumentParser.h" 28 29#include "CDATASection.h" 30#include "CachedScript.h" 31#include "Comment.h" 32#include "CachedResourceLoader.h" 33#include "Document.h" 34#include "DocumentFragment.h" 35#include "DocumentType.h" 36#include "ExceptionCodePlaceholder.h" 37#include "Frame.h" 38#include "FrameLoader.h" 39#include "FrameView.h" 40#include "HTMLEntityParser.h" 41#include "HTMLHtmlElement.h" 42#include "HTMLLinkElement.h" 43#include "HTMLNames.h" 44#include "HTMLStyleElement.h" 45#include "ProcessingInstruction.h" 46#include "ResourceError.h" 47#include "ResourceHandle.h" 48#include "ResourceRequest.h" 49#include "ResourceResponse.h" 50#include "ScriptableDocumentParser.h" 51#include "ScriptElement.h" 52#include "ScriptSourceCode.h" 53#include "ScriptValue.h" 54#include "TextResourceDecoder.h" 55#include "TransformSource.h" 56#include "XMLNSNames.h" 57#include <QDebug> 58#include <wtf/StringExtras.h> 59#include <wtf/Threading.h> 60#include <wtf/Vector.h> 61#include <wtf/text/CString.h> 62 63using namespace std; 64 65namespace WebCore { 66 67static inline void setAttributes(Element* element, Vector<Attribute>& attributeVector, ParserContentPolicy parserContentPolicy) 68{ 69 if (!scriptingContentIsAllowed(parserContentPolicy)) 70 element->stripScriptingAttributes(attributeVector); 71 element->parserSetAttributes(attributeVector); 72} 73 74class EntityResolver : public QXmlStreamEntityResolver { 75 virtual QString resolveUndeclaredEntity(const QString &name); 76}; 77 78static QString decodeNamedEntity(const QString& entityName) 79{ 80 UChar utf16DecodedEntity[4]; 81 size_t numberOfCodePoints = decodeNamedEntityToUCharArray(entityName.toUtf8().constData(), utf16DecodedEntity); 82 return QString(reinterpret_cast<const QChar*>(utf16DecodedEntity), numberOfCodePoints); 83} 84 85QString EntityResolver::resolveUndeclaredEntity(const QString &name) 86{ 87 return decodeNamedEntity(name); 88} 89 90// -------------------------------- 91 92bool XMLDocumentParser::supportsXMLVersion(const String& version) 93{ 94 return version == "1.0"; 95} 96 97XMLDocumentParser::XMLDocumentParser(Document* document, FrameView* frameView) 98 : ScriptableDocumentParser(document) 99 , m_view(frameView) 100 , m_wroteText(false) 101 , m_currentNode(document) 102 , m_sawError(false) 103 , m_sawCSS(false) 104 , m_sawXSLTransform(false) 105 , m_sawFirstElement(false) 106 , m_isXHTMLDocument(false) 107 , m_parserPaused(false) 108 , m_requestingScript(false) 109 , m_finishCalled(false) 110 , m_xmlErrors(document) 111 , m_pendingScript(0) 112 , m_scriptStartPosition(TextPosition::belowRangePosition()) 113 , m_parsingFragment(false) 114{ 115 m_stream.setEntityResolver(new EntityResolver); 116} 117 118XMLDocumentParser::XMLDocumentParser(DocumentFragment* fragment, Element* parentElement, ParserContentPolicy parserContentPolicy) 119 : ScriptableDocumentParser(fragment->document(), parserContentPolicy) 120 , m_view(0) 121 , m_wroteText(false) 122 , m_currentNode(fragment) 123 , m_sawError(false) 124 , m_sawCSS(false) 125 , m_sawXSLTransform(false) 126 , m_sawFirstElement(false) 127 , m_isXHTMLDocument(false) 128 , m_parserPaused(false) 129 , m_requestingScript(false) 130 , m_finishCalled(false) 131 , m_xmlErrors(fragment->document()) 132 , m_pendingScript(0) 133 , m_scriptStartPosition(TextPosition::belowRangePosition()) 134 , m_parsingFragment(true) 135{ 136 fragment->ref(); 137 138 // Add namespaces based on the parent node 139 Vector<Element*> elemStack; 140 while (parentElement) { 141 elemStack.append(parentElement); 142 143 Node* n = parentElement->parentNode(); 144 if (!n || !n->isElementNode()) 145 break; 146 parentElement = toElement(n); 147 } 148 149 if (elemStack.isEmpty()) 150 return; 151 152 QXmlStreamNamespaceDeclarations namespaces; 153 for (Element* element = elemStack.last(); !elemStack.isEmpty(); elemStack.removeLast()) { 154 element->synchronizeAllAttributes(); 155 if (const ElementData* attrs = element->elementData()) { 156 for (unsigned i = 0; i < attrs->length(); i++) { 157 const Attribute* attr = attrs->attributeItem(i); 158 if (attr->localName() == "xmlns") 159 m_defaultNamespaceURI = attr->value(); 160 else if (attr->prefix() == "xmlns") 161 namespaces.append(QXmlStreamNamespaceDeclaration(attr->localName(), attr->value())); 162 } 163 } 164 } 165 m_stream.addExtraNamespaceDeclarations(namespaces); 166 m_stream.setEntityResolver(new EntityResolver); 167 168 // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace. 169 if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument()) 170 m_defaultNamespaceURI = parentElement->namespaceURI(); 171} 172 173XMLDocumentParser::~XMLDocumentParser() 174{ 175 clearCurrentNodeStack(); 176 if (m_pendingScript) 177 m_pendingScript->removeClient(this); 178 delete m_stream.entityResolver(); 179} 180 181void XMLDocumentParser::doWrite(const String& parseString) 182{ 183 m_wroteText = true; 184 185 if (document()->decoder() && document()->decoder()->sawError()) { 186 // If the decoder saw an error, report it as fatal (stops parsing) 187 handleError(XMLErrors::fatal, "Encoding error", textPosition()); 188 return; 189 } 190 191 QString data(parseString); 192 if (!data.isEmpty()) { 193 // JavaScript may cause the parser to detach, 194 // keep this alive until this function is done. 195 RefPtr<XMLDocumentParser> protect(this); 196 197 m_stream.addData(data); 198 parse(); 199 } 200 201 return; 202} 203 204void XMLDocumentParser::initializeParserContext(const CString&) 205{ 206 DocumentParser::startParsing(); 207 m_sawError = false; 208 m_sawCSS = false; 209 m_sawXSLTransform = false; 210 m_sawFirstElement = false; 211} 212 213void XMLDocumentParser::doEnd() 214{ 215#if ENABLE(XSLT) 216 if (m_sawXSLTransform) { 217 document()->setTransformSource(adoptPtr(new TransformSource(m_originalSourceForTransform.toString()))); 218 document()->setParsing(false); // Make the doc think it's done, so it will apply xsl sheets. 219 document()->styleResolverChanged(RecalcStyleImmediately); 220 221 // styleResolverChanged() call can detach the parser and null out its document. 222 // In that case, we just bail out. 223 if (isDetached()) 224 return; 225 226 document()->setParsing(true); 227 DocumentParser::stopParsing(); 228 } 229#endif 230 231 if (m_stream.error() == QXmlStreamReader::PrematureEndOfDocumentError 232 || (m_wroteText && !m_sawFirstElement && !m_sawXSLTransform && !m_sawError)) 233 handleError(XMLErrors::fatal, qPrintable(m_stream.errorString()), textPosition()); 234} 235 236OrdinalNumber XMLDocumentParser::lineNumber() const 237{ 238 return OrdinalNumber::fromOneBasedInt(m_stream.lineNumber()); 239} 240 241OrdinalNumber XMLDocumentParser::columnNumber() const 242{ 243 return OrdinalNumber::fromOneBasedInt(m_stream.columnNumber()); 244} 245 246TextPosition XMLDocumentParser::textPosition() const 247{ 248 return TextPosition(lineNumber(), columnNumber()); 249} 250 251void XMLDocumentParser::stopParsing() 252{ 253 ScriptableDocumentParser::stopParsing(); 254} 255 256void XMLDocumentParser::resumeParsing() 257{ 258 ASSERT(m_parserPaused); 259 260 m_parserPaused = false; 261 262 // First, execute any pending callbacks 263 parse(); 264 if (m_parserPaused) 265 return; 266 267 // Then, write any pending data 268 SegmentedString rest = m_pendingSrc; 269 m_pendingSrc.clear(); 270 append(rest.toString().impl()); 271 272 // Finally, if finish() has been called and append() didn't result 273 // in any further callbacks being queued, call end() 274 if (m_finishCalled && !m_parserPaused && !m_pendingScript) 275 end(); 276} 277 278bool XMLDocumentParser::appendFragmentSource(const String& source) 279{ 280 ASSERT(!m_sawFirstElement); 281 append(String("<qxmlstreamdummyelement>").impl()); 282 append(source.impl()); 283 append(String("</qxmlstreamdummyelement>").impl()); 284 return !hasError(); 285} 286 287// -------------------------------- 288 289struct AttributeParseState { 290 HashMap<String, String> attributes; 291 bool gotAttributes; 292}; 293 294static void attributesStartElementNsHandler(AttributeParseState* state, const QXmlStreamAttributes& attrs) 295{ 296 if (attrs.count() <= 0) 297 return; 298 299 state->gotAttributes = true; 300 301 for (int i = 0; i < attrs.count(); i++) { 302 const QXmlStreamAttribute& attr = attrs[i]; 303 String attrLocalName = attr.name(); 304 String attrValue = attr.value(); 305 String attrURI = attr.namespaceUri(); 306 String attrQName = attr.qualifiedName(); 307 state->attributes.set(attrQName, attrValue); 308 } 309} 310 311HashMap<String, String> parseAttributes(const String& string, bool& attrsOK) 312{ 313 AttributeParseState state; 314 state.gotAttributes = false; 315 316 QXmlStreamReader stream; 317 QString dummy = QString(QLatin1String("<?xml version=\"1.0\"?><attrs %1 />")).arg(string); 318 stream.addData(dummy); 319 while (!stream.atEnd()) { 320 stream.readNext(); 321 if (stream.isStartElement()) { 322 attributesStartElementNsHandler(&state, stream.attributes()); 323 } 324 } 325 attrsOK = state.gotAttributes; 326 return state.attributes; 327} 328 329static inline String prefixFromQName(const QString& qName) 330{ 331 const int offset = qName.indexOf(QLatin1Char(':')); 332 if (offset <= 0) 333 return String(); 334 else 335 return qName.left(offset); 336} 337 338static inline void handleNamespaceAttributes(Vector<Attribute>& prefixedAttributes, const QXmlStreamNamespaceDeclarations &ns, ExceptionCode& ec) 339{ 340 for (int i = 0; i < ns.count(); ++i) { 341 const QXmlStreamNamespaceDeclaration &decl = ns[i]; 342 String namespaceURI = decl.namespaceUri(); 343 String namespaceQName = decl.prefix().isEmpty() ? String("xmlns") : String("xmlns:"); 344 namespaceQName.append(decl.prefix()); 345 346 QualifiedName parsedName = anyName; 347 if (!Element::parseAttributeName(parsedName, XMLNSNames::xmlnsNamespaceURI, namespaceQName, ec)) 348 return; 349 350 prefixedAttributes.append(Attribute(parsedName, namespaceURI)); 351 } 352} 353 354static inline void handleElementAttributes(Vector<Attribute>& prefixedAttributes, const QXmlStreamAttributes &attrs, ExceptionCode& ec) 355{ 356 for (int i = 0; i < attrs.count(); ++i) { 357 const QXmlStreamAttribute &attr = attrs[i]; 358 String attrLocalName = attr.name(); 359 String attrValue = attr.value(); 360 String attrURI = attr.namespaceUri().isEmpty() ? String() : String(attr.namespaceUri()); 361 String attrQName = attr.qualifiedName(); 362 363 QualifiedName parsedName = anyName; 364 if (!Element::parseAttributeName(parsedName, attrURI, attrQName, ec)) 365 return; 366 367 prefixedAttributes.append(Attribute(parsedName, attrValue)); 368 } 369} 370 371void XMLDocumentParser::parse() 372{ 373 while (!isStopped() && !m_parserPaused && !m_stream.atEnd()) { 374 m_stream.readNext(); 375 switch (m_stream.tokenType()) { 376 case QXmlStreamReader::StartDocument: 377 startDocument(); 378 break; 379 case QXmlStreamReader::EndDocument: 380 endDocument(); 381 break; 382 case QXmlStreamReader::StartElement: 383 parseStartElement(); 384 break; 385 case QXmlStreamReader::EndElement: 386 parseEndElement(); 387 break; 388 case QXmlStreamReader::Characters: { 389 if (m_stream.isCDATA()) { 390 //cdata 391 parseCdata(); 392 } else { 393 //characters 394 parseCharacters(); 395 } 396 break; 397 } 398 case QXmlStreamReader::Comment: 399 parseComment(); 400 break; 401 case QXmlStreamReader::DTD: 402 //qDebug()<<"------------- DTD"; 403 parseDtd(); 404 break; 405 case QXmlStreamReader::EntityReference: { 406 //qDebug()<<"---------- ENTITY = "<<m_stream.name().toString() 407 // <<", t = "<<m_stream.text().toString(); 408 if (isXHTMLDocument()) { 409 QString entity = m_stream.name().toString(); 410 if (!m_leafTextNode) 411 enterText(); 412 // qDebug()<<" ------- adding entity "<<str; 413 m_leafTextNode->appendData(decodeNamedEntity(entity), IGNORE_EXCEPTION); 414 } 415 break; 416 } 417 case QXmlStreamReader::ProcessingInstruction: 418 parseProcessingInstruction(); 419 break; 420 default: { 421 if (m_stream.error() != QXmlStreamReader::PrematureEndOfDocumentError) { 422 XMLErrors::ErrorType type = (m_stream.error() == QXmlStreamReader::NotWellFormedError) ? 423 XMLErrors::fatal : XMLErrors::warning; 424 handleError(type, qPrintable(m_stream.errorString()), textPosition()); 425 } 426 break; 427 } 428 } 429 } 430} 431 432void XMLDocumentParser::startDocument() 433{ 434 initializeParserContext(); 435 436 if (!m_parsingFragment) { 437 document()->setXMLStandalone(m_stream.isStandaloneDocument(), IGNORE_EXCEPTION); 438 439 QStringRef version = m_stream.documentVersion(); 440 if (!version.isEmpty()) 441 document()->setXMLVersion(version, IGNORE_EXCEPTION); 442 QStringRef encoding = m_stream.documentEncoding(); 443 if (!encoding.isEmpty()) 444 document()->setXMLEncoding(encoding); 445 document()->setHasXMLDeclaration(!version.isEmpty()); 446 } 447} 448 449void XMLDocumentParser::parseStartElement() 450{ 451 if (!m_sawFirstElement && m_parsingFragment) { 452 // skip dummy element for fragments 453 m_sawFirstElement = true; 454 return; 455 } 456 457 exitText(); 458 459 String localName = m_stream.name(); 460 String uri = m_stream.namespaceUri(); 461 String prefix = prefixFromQName(m_stream.qualifiedName().toString()); 462 463 if (m_parsingFragment && uri.isNull()) { 464 Q_ASSERT(prefix.isNull()); 465 uri = m_defaultNamespaceURI; 466 } 467 468 QualifiedName qName(prefix, localName, uri); 469 RefPtr<Element> newElement = document()->createElement(qName, true); 470 if (!newElement) { 471 stopParsing(); 472 return; 473 } 474 475 bool isFirstElement = !m_sawFirstElement; 476 m_sawFirstElement = true; 477 478 Vector<Attribute> prefixedAttributes; 479 ExceptionCode ec = 0; 480 handleNamespaceAttributes(prefixedAttributes, m_stream.namespaceDeclarations(), ec); 481 if (ec) { 482 setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy()); 483 stopParsing(); 484 return; 485 } 486 487 handleElementAttributes(prefixedAttributes, m_stream.attributes(), ec); 488 setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy()); 489 if (ec) { 490 stopParsing(); 491 return; 492 } 493 494 ScriptElement* scriptElement = toScriptElementIfPossible(newElement.get()); 495 if (scriptElement) 496 m_scriptStartPosition = textPosition(); 497 498 m_currentNode->parserAppendChild(newElement.get()); 499 500 pushCurrentNode(newElement.get()); 501 if (m_view && !newElement->attached()) 502 newElement->attach(); 503 504 if (newElement->hasTagName(HTMLNames::htmlTag)) 505 static_cast<HTMLHtmlElement*>(newElement.get())->insertedByParser(); 506 507 if (isFirstElement && document()->frame()) 508 document()->frame()->loader()->dispatchDocumentElementAvailable(); 509} 510 511void XMLDocumentParser::parseEndElement() 512{ 513 exitText(); 514 515 RefPtr<ContainerNode> n = m_currentNode; 516 n->finishParsingChildren(); 517 518 if (!scriptingContentIsAllowed(parserContentPolicy()) && n->isElementNode() && toScriptElementIfPossible(toElement(n.get()))) { 519 popCurrentNode(); 520 n->remove(IGNORE_EXCEPTION); 521 return; 522 } 523 524 if (!n->isElementNode() || !m_view) { 525 if (!m_currentNodeStack.isEmpty()) 526 popCurrentNode(); 527 return; 528 } 529 530 Element* element = toElement(n.get()); 531 532 // The element's parent may have already been removed from document. 533 // Parsing continues in this case, but scripts aren't executed. 534 if (!element->inDocument()) { 535 popCurrentNode(); 536 return; 537 } 538 539 ScriptElement* scriptElement = toScriptElementIfPossible(element); 540 if (!scriptElement) { 541 popCurrentNode(); 542 return; 543 } 544 545 // don't load external scripts for standalone documents (for now) 546 ASSERT(!m_pendingScript); 547 m_requestingScript = true; 548 549 if (scriptElement->prepareScript(m_scriptStartPosition, ScriptElement::AllowLegacyTypeInTypeAttribute)) { 550 if (scriptElement->readyToBeParserExecuted()) 551 scriptElement->executeScript(ScriptSourceCode(scriptElement->scriptContent(), document()->url(), m_scriptStartPosition)); 552 else if (scriptElement->willBeParserExecuted()) { 553 m_pendingScript = scriptElement->cachedScript(); 554 m_scriptElement = element; 555 m_pendingScript->addClient(this); 556 557 // m_pendingScript will be 0 if script was already loaded and addClient() executed it. 558 if (m_pendingScript) 559 pauseParsing(); 560 } else 561 m_scriptElement = 0; 562 } 563 m_requestingScript = false; 564 popCurrentNode(); 565} 566 567void XMLDocumentParser::parseCharacters() 568{ 569 if (!m_leafTextNode) 570 enterText(); 571 m_leafTextNode->appendData(m_stream.text(), IGNORE_EXCEPTION); 572} 573 574void XMLDocumentParser::parseProcessingInstruction() 575{ 576 exitText(); 577 578 // ### handle exceptions 579 int exception = 0; 580 RefPtr<ProcessingInstruction> pi = document()->createProcessingInstruction( 581 m_stream.processingInstructionTarget(), 582 m_stream.processingInstructionData(), exception); 583 if (exception) 584 return; 585 586 pi->setCreatedByParser(true); 587 588 m_currentNode->parserAppendChild(pi.get()); 589 if (m_view && !pi->attached()) 590 pi->attach(); 591 592 pi->finishParsingChildren(); 593 594 if (pi->isCSS()) 595 m_sawCSS = true; 596#if ENABLE(XSLT) 597 m_sawXSLTransform = !m_sawFirstElement && pi->isXSL(); 598 if (m_sawXSLTransform && !document()->transformSourceDocument()) 599 stopParsing(); 600#endif 601} 602 603void XMLDocumentParser::parseCdata() 604{ 605 exitText(); 606 607 RefPtr<CDATASection> newNode = CDATASection::create(document(), m_stream.text()); 608 609 m_currentNode->parserAppendChild(newNode.get()); 610 if (m_view && !newNode->attached()) 611 newNode->attach(); 612} 613 614void XMLDocumentParser::parseComment() 615{ 616 exitText(); 617 618 RefPtr<Comment> newNode = Comment::create(document(), m_stream.text()); 619 620 m_currentNode->parserAppendChild(newNode.get()); 621 if (m_view && !newNode->attached()) 622 newNode->attach(); 623} 624 625void XMLDocumentParser::endDocument() 626{ 627} 628 629bool XMLDocumentParser::hasError() const 630{ 631 return m_stream.hasError(); 632} 633 634void XMLDocumentParser::parseDtd() 635{ 636 QStringRef name = m_stream.dtdName(); 637 QStringRef publicId = m_stream.dtdPublicId(); 638 QStringRef systemId = m_stream.dtdSystemId(); 639 640 //qDebug() << dtd << name << publicId << systemId; 641 if ((publicId == QLatin1String("-//W3C//DTD XHTML 1.0 Transitional//EN")) 642 || (publicId == QLatin1String("-//W3C//DTD XHTML 1.1//EN")) 643 || (publicId == QLatin1String("-//W3C//DTD XHTML 1.0 Strict//EN")) 644 || (publicId == QLatin1String("-//W3C//DTD XHTML 1.0 Frameset//EN")) 645 || (publicId == QLatin1String("-//W3C//DTD XHTML Basic 1.0//EN")) 646 || (publicId == QLatin1String("-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN")) 647 || (publicId == QLatin1String("-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN")) 648 || (publicId == QLatin1String("-//WAPFORUM//DTD XHTML Mobile 1.0//EN")) 649 || (publicId == QLatin1String("-//WAPFORUM//DTD XHTML Mobile 1.1//EN")) 650 || (publicId == QLatin1String("-//WAPFORUM//DTD XHTML Mobile 1.2//EN")) 651 ) 652 setIsXHTMLDocument(true); // controls if we replace entities or not. 653 if (!m_parsingFragment) 654 document()->parserAppendChild(DocumentType::create(document(), name, publicId, systemId)); 655 656} 657} 658