1/* 2 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 3 * Copyright (C) 2011 Apple Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#include "config.h" 28#include "HTMLTreeBuilder.h" 29 30#include "DocumentFragment.h" 31#include "HTMLDocument.h" 32#include "HTMLDocumentParser.h" 33#include "HTMLFormElement.h" 34#include "HTMLOptGroupElement.h" 35#include "HTMLOptionElement.h" 36#include "HTMLParserIdioms.h" 37#include "HTMLTableElement.h" 38#include "HTMLTemplateElement.h" 39#include "LocalizedStrings.h" 40#include "NotImplemented.h" 41#include "XLinkNames.h" 42#include "XMLNSNames.h" 43#include "XMLNames.h" 44#include <wtf/MainThread.h> 45#include <wtf/NeverDestroyed.h> 46#include <wtf/unicode/CharacterNames.h> 47 48#if ENABLE(TELEPHONE_NUMBER_DETECTION) 49#include "TelephoneNumberDetector.h" 50#endif 51 52namespace WebCore { 53 54using namespace HTMLNames; 55 56namespace { 57 58inline bool isHTMLSpaceOrReplacementCharacter(UChar character) 59{ 60 return isHTMLSpace(character) || character == replacementCharacter; 61} 62 63} 64 65static TextPosition uninitializedPositionValue1() 66{ 67 return TextPosition(OrdinalNumber::fromOneBasedInt(-1), OrdinalNumber::first()); 68} 69 70static inline bool isAllWhitespace(const String& string) 71{ 72 return string.isAllSpecialCharacters<isHTMLSpace>(); 73} 74 75static inline bool isAllWhitespaceOrReplacementCharacters(const String& string) 76{ 77 return string.isAllSpecialCharacters<isHTMLSpaceOrReplacementCharacter>(); 78} 79 80static bool isNumberedHeaderTag(const AtomicString& tagName) 81{ 82 return tagName == h1Tag 83 || tagName == h2Tag 84 || tagName == h3Tag 85 || tagName == h4Tag 86 || tagName == h5Tag 87 || tagName == h6Tag; 88} 89 90static bool isCaptionColOrColgroupTag(const AtomicString& tagName) 91{ 92 return tagName == captionTag 93 || tagName == colTag 94 || tagName == colgroupTag; 95} 96 97static bool isTableCellContextTag(const AtomicString& tagName) 98{ 99 return tagName == thTag || tagName == tdTag; 100} 101 102static bool isTableBodyContextTag(const AtomicString& tagName) 103{ 104 return tagName == tbodyTag 105 || tagName == tfootTag 106 || tagName == theadTag; 107} 108 109static bool isNonAnchorNonNobrFormattingTag(const AtomicString& tagName) 110{ 111 return tagName == bTag 112 || tagName == bigTag 113 || tagName == codeTag 114 || tagName == emTag 115 || tagName == fontTag 116 || tagName == iTag 117 || tagName == sTag 118 || tagName == smallTag 119 || tagName == strikeTag 120 || tagName == strongTag 121 || tagName == ttTag 122 || tagName == uTag; 123} 124 125static bool isNonAnchorFormattingTag(const AtomicString& tagName) 126{ 127 return tagName == nobrTag 128 || isNonAnchorNonNobrFormattingTag(tagName); 129} 130 131// http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#formatting 132static bool isFormattingTag(const AtomicString& tagName) 133{ 134 return tagName == aTag || isNonAnchorFormattingTag(tagName); 135} 136 137class HTMLTreeBuilder::ExternalCharacterTokenBuffer { 138 WTF_MAKE_NONCOPYABLE(ExternalCharacterTokenBuffer); 139public: 140 explicit ExternalCharacterTokenBuffer(AtomicHTMLToken* token) 141 : m_text(token->characters(), token->charactersLength()) 142 , m_isAll8BitData(token->isAll8BitData()) 143 { 144 ASSERT(!isEmpty()); 145 } 146 147 explicit ExternalCharacterTokenBuffer(const String& string) 148 : m_text(string) 149 , m_isAll8BitData(m_text.length() && m_text.is8Bit()) 150 { 151 ASSERT(!isEmpty()); 152 } 153 154 ~ExternalCharacterTokenBuffer() 155 { 156 ASSERT(isEmpty()); 157 } 158 159 bool isEmpty() const { return m_text.isEmpty(); } 160 161 bool isAll8BitData() const { return m_isAll8BitData; } 162 163 void skipAtMostOneLeadingNewline() 164 { 165 ASSERT(!isEmpty()); 166 if (m_text[0] == '\n') 167 m_text = m_text.substring(1); 168 } 169 170 void skipLeadingWhitespace() 171 { 172 skipLeading<isHTMLSpace>(); 173 } 174 175 String takeLeadingWhitespace() 176 { 177 return takeLeading<isHTMLSpace>(); 178 } 179 180 void skipLeadingNonWhitespace() 181 { 182 skipLeading<isNotHTMLSpace>(); 183 } 184 185 String takeRemaining() 186 { 187 String result; 188 if (m_text.is8Bit() || !isAll8BitData()) 189 result = m_text.toString(); 190 else 191 result = String::make8BitFrom16BitSource(m_text.characters16(), m_text.length()); 192 m_text = StringView(); 193 return result; 194 } 195 196 void giveRemainingTo(StringBuilder& recipient) 197 { 198 recipient.append(m_text); 199 m_text = StringView(); 200 } 201 202 String takeRemainingWhitespace() 203 { 204 ASSERT(!isEmpty()); 205 Vector<LChar, 8> whitespace; 206 do { 207 UChar character = m_text[0]; 208 if (isHTMLSpace(character)) 209 whitespace.append(character); 210 m_text = m_text.substring(1); 211 } while (!m_text.isEmpty()); 212 213 // Returning the null string when there aren't any whitespace 214 // characters is slightly cleaner semantically because we don't want 215 // to insert a text node (as opposed to inserting an empty text node). 216 if (whitespace.isEmpty()) 217 return String(); 218 219 return String::adopt(whitespace); 220 } 221 222private: 223 template<bool characterPredicate(UChar)> 224 void skipLeading() 225 { 226 ASSERT(!isEmpty()); 227 while (characterPredicate(m_text[0])) { 228 m_text = m_text.substring(1); 229 if (m_text.isEmpty()) 230 return; 231 } 232 } 233 234 template<bool characterPredicate(UChar)> 235 String takeLeading() 236 { 237 ASSERT(!isEmpty()); 238 StringView start = m_text; 239 skipLeading<characterPredicate>(); 240 if (start.length() == m_text.length()) 241 return String(); 242 StringView leading = start.substring(0, start.length() - m_text.length()); 243 if (leading.is8Bit() || !isAll8BitData()) 244 return leading.toString(); 245 return String::make8BitFrom16BitSource(leading.characters16(), leading.length()); 246 } 247 248 StringView m_text; 249 bool m_isAll8BitData; 250}; 251 252 253HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser& parser, HTMLDocument& document, ParserContentPolicy parserContentPolicy, const HTMLParserOptions& options) 254 : m_framesetOk(true) 255#ifndef NDEBUG 256 , m_isAttached(true) 257#endif 258 , m_tree(document, parserContentPolicy, options.maximumDOMTreeDepth) 259 , m_insertionMode(InsertionMode::Initial) 260 , m_originalInsertionMode(InsertionMode::Initial) 261 , m_shouldSkipLeadingNewline(false) 262 , m_parser(parser) 263 , m_scriptToProcessStartPosition(uninitializedPositionValue1()) 264 , m_options(options) 265{ 266} 267 268// FIXME: Member variables should be grouped into self-initializing structs to 269// minimize code duplication between these constructors. 270HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser& parser, DocumentFragment& fragment, Element* contextElement, ParserContentPolicy parserContentPolicy, const HTMLParserOptions& options) 271 : m_framesetOk(true) 272#ifndef NDEBUG 273 , m_isAttached(true) 274#endif 275 , m_fragmentContext(fragment, contextElement) 276 , m_tree(fragment, parserContentPolicy, options.maximumDOMTreeDepth) 277 , m_insertionMode(InsertionMode::Initial) 278 , m_originalInsertionMode(InsertionMode::Initial) 279 , m_shouldSkipLeadingNewline(false) 280 , m_parser(parser) 281 , m_scriptToProcessStartPosition(uninitializedPositionValue1()) 282 , m_options(options) 283{ 284 ASSERT(isMainThread()); 285 // FIXME: This assertion will become invalid if <http://webkit.org/b/60316> is fixed. 286 ASSERT(contextElement); 287 if (contextElement) { 288 // Steps 4.2-4.6 of the HTML5 Fragment Case parsing algorithm: 289 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-end.html#fragment-case 290 // For efficiency, we skip step 4.2 ("Let root be a new html element with no attributes") 291 // and instead use the DocumentFragment as a root node. 292 m_tree.openElements()->pushRootNode(HTMLStackItem::create(&fragment, HTMLStackItem::ItemForDocumentFragmentNode)); 293 294#if ENABLE(TEMPLATE_ELEMENT) 295 if (contextElement->hasTagName(templateTag)) 296 m_templateInsertionModes.append(InsertionMode::TemplateContents); 297#endif 298 299 resetInsertionModeAppropriately(); 300 m_tree.setForm(!contextElement || isHTMLFormElement(contextElement) ? toHTMLFormElement(contextElement) : HTMLFormElement::findClosestFormAncestor(*contextElement)); 301 } 302} 303 304HTMLTreeBuilder::~HTMLTreeBuilder() 305{ 306} 307 308void HTMLTreeBuilder::detach() 309{ 310#ifndef NDEBUG 311 // This call makes little sense in fragment mode, but for consistency 312 // DocumentParser expects detach() to always be called before it's destroyed. 313 m_isAttached = false; 314#endif 315 // HTMLConstructionSite might be on the callstack when detach() is called 316 // otherwise we'd just call m_tree.clear() here instead. 317 m_tree.detach(); 318} 319 320HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext() 321 : m_fragment(0) 322 , m_contextElement(0) 323{ 324} 325 326HTMLTreeBuilder::FragmentParsingContext::FragmentParsingContext(DocumentFragment& fragment, Element* contextElement) 327 : m_fragment(&fragment) 328 , m_contextElement(contextElement) 329{ 330 ASSERT(!fragment.hasChildNodes()); 331} 332 333HTMLTreeBuilder::FragmentParsingContext::~FragmentParsingContext() 334{ 335} 336 337PassRefPtr<Element> HTMLTreeBuilder::takeScriptToProcess(TextPosition& scriptStartPosition) 338{ 339 ASSERT(m_scriptToProcess); 340 // Unpause ourselves, callers may pause us again when processing the script. 341 // The HTML5 spec is written as though scripts are executed inside the tree 342 // builder. We pause the parser to exit the tree builder, and then resume 343 // before running scripts. 344 scriptStartPosition = m_scriptToProcessStartPosition; 345 m_scriptToProcessStartPosition = uninitializedPositionValue1(); 346 return m_scriptToProcess.release(); 347} 348 349void HTMLTreeBuilder::constructTree(AtomicHTMLToken* token) 350{ 351 if (shouldProcessTokenInForeignContent(token)) 352 processTokenInForeignContent(token); 353 else 354 processToken(token); 355 356 if (m_parser.tokenizer()) { 357 bool inForeignContent = !m_tree.isEmpty() 358 && !m_tree.currentStackItem()->isInHTMLNamespace() 359 && !HTMLElementStack::isHTMLIntegrationPoint(m_tree.currentStackItem()) 360 && !HTMLElementStack::isMathMLTextIntegrationPoint(m_tree.currentStackItem()); 361 362 m_parser.tokenizer()->setForceNullCharacterReplacement(m_insertionMode == InsertionMode::Text || inForeignContent); 363 m_parser.tokenizer()->setShouldAllowCDATA(inForeignContent); 364 } 365 366 m_tree.executeQueuedTasks(); 367 // We might be detached now. 368} 369 370void HTMLTreeBuilder::processToken(AtomicHTMLToken* token) 371{ 372 switch (token->type()) { 373 case HTMLToken::Uninitialized: 374 ASSERT_NOT_REACHED(); 375 break; 376 case HTMLToken::DOCTYPE: 377 m_shouldSkipLeadingNewline = false; 378 processDoctypeToken(token); 379 break; 380 case HTMLToken::StartTag: 381 m_shouldSkipLeadingNewline = false; 382 processStartTag(token); 383 break; 384 case HTMLToken::EndTag: 385 m_shouldSkipLeadingNewline = false; 386 processEndTag(token); 387 break; 388 case HTMLToken::Comment: 389 m_shouldSkipLeadingNewline = false; 390 processComment(token); 391 return; 392 case HTMLToken::Character: 393 processCharacter(token); 394 break; 395 case HTMLToken::EndOfFile: 396 m_shouldSkipLeadingNewline = false; 397 processEndOfFile(token); 398 break; 399 } 400} 401 402void HTMLTreeBuilder::processDoctypeToken(AtomicHTMLToken* token) 403{ 404 ASSERT(token->type() == HTMLToken::DOCTYPE); 405 if (m_insertionMode == InsertionMode::Initial) { 406 m_tree.insertDoctype(token); 407 setInsertionMode(InsertionMode::BeforeHTML); 408 return; 409 } 410 if (m_insertionMode == InsertionMode::InTableText) { 411 defaultForInTableText(); 412 processDoctypeToken(token); 413 return; 414 } 415 parseError(token); 416} 417 418void HTMLTreeBuilder::processFakeStartTag(const QualifiedName& tagName, const Vector<Attribute>& attributes) 419{ 420 // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags. 421 AtomicHTMLToken fakeToken(HTMLToken::StartTag, tagName.localName(), attributes); 422 processStartTag(&fakeToken); 423} 424 425void HTMLTreeBuilder::processFakeEndTag(const AtomicString& tagName) 426{ 427 AtomicHTMLToken fakeToken(HTMLToken::EndTag, tagName); 428 processEndTag(&fakeToken); 429} 430 431void HTMLTreeBuilder::processFakeEndTag(const QualifiedName& tagName) 432{ 433 // FIXME: We'll need a fancier conversion than just "localName" for SVG/MathML tags. 434 processFakeEndTag(tagName.localName()); 435} 436 437void HTMLTreeBuilder::processFakeCharacters(const String& characters) 438{ 439 ASSERT(!characters.isEmpty()); 440 ExternalCharacterTokenBuffer buffer(characters); 441 processCharacterBuffer(buffer); 442} 443 444void HTMLTreeBuilder::processFakePEndTagIfPInButtonScope() 445{ 446 if (!m_tree.openElements()->inButtonScope(pTag.localName())) 447 return; 448 AtomicHTMLToken endP(HTMLToken::EndTag, pTag.localName()); 449 processEndTag(&endP); 450} 451 452Vector<Attribute> HTMLTreeBuilder::attributesForIsindexInput(AtomicHTMLToken* token) 453{ 454 Vector<Attribute> attributes = token->attributes(); 455 for (int i = attributes.size() - 1; i >= 0; --i) { 456 const QualifiedName& name = attributes.at(i).name(); 457 if (name.matches(nameAttr) || name.matches(actionAttr) || name.matches(promptAttr)) 458 attributes.remove(i); 459 } 460 461 attributes.append(Attribute(nameAttr, isindexTag.localName())); 462 return attributes; 463} 464 465void HTMLTreeBuilder::processIsindexStartTagForInBody(AtomicHTMLToken* token) 466{ 467 ASSERT(token->type() == HTMLToken::StartTag); 468 ASSERT(token->name() == isindexTag); 469 parseError(token); 470 if (m_tree.form() && !isParsingTemplateContents()) 471 return; 472 notImplemented(); // Acknowledge self-closing flag 473 processFakeStartTag(formTag); 474 Attribute* actionAttribute = token->getAttributeItem(actionAttr); 475 if (actionAttribute) 476 m_tree.form()->setAttribute(actionAttr, actionAttribute->value()); 477 processFakeStartTag(hrTag); 478 processFakeStartTag(labelTag); 479 Attribute* promptAttribute = token->getAttributeItem(promptAttr); 480 if (promptAttribute) 481 processFakeCharacters(promptAttribute->value()); 482 else 483 processFakeCharacters(searchableIndexIntroduction()); 484 processFakeStartTag(inputTag, attributesForIsindexInput(token)); 485 notImplemented(); // This second set of characters may be needed by non-english locales. 486 processFakeEndTag(labelTag); 487 processFakeStartTag(hrTag); 488 processFakeEndTag(formTag); 489} 490 491namespace { 492 493bool isLi(const HTMLStackItem* item) 494{ 495 return item->hasTagName(liTag); 496} 497 498bool isDdOrDt(const HTMLStackItem* item) 499{ 500 return item->hasTagName(ddTag) 501 || item->hasTagName(dtTag); 502} 503 504} 505 506template <bool shouldClose(const HTMLStackItem*)> 507void HTMLTreeBuilder::processCloseWhenNestedTag(AtomicHTMLToken* token) 508{ 509 m_framesetOk = false; 510 HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord(); 511 while (1) { 512 RefPtr<HTMLStackItem> item = nodeRecord->stackItem(); 513 if (shouldClose(item.get())) { 514 ASSERT(item->isElementNode()); 515 processFakeEndTag(item->localName()); 516 break; 517 } 518 if (item->isSpecialNode() && !item->hasTagName(addressTag) && !item->hasTagName(divTag) && !item->hasTagName(pTag)) 519 break; 520 nodeRecord = nodeRecord->next(); 521 } 522 processFakePEndTagIfPInButtonScope(); 523 m_tree.insertHTMLElement(token); 524} 525 526template <typename TableQualifiedName> 527static HashMap<AtomicString, QualifiedName> createCaseMap(const TableQualifiedName* const names[], unsigned length) 528{ 529 HashMap<AtomicString, QualifiedName> map; 530 for (unsigned i = 0; i < length; ++i) { 531 const QualifiedName& name = *names[i]; 532 const AtomicString& localName = name.localName(); 533 AtomicString loweredLocalName = localName.lower(); 534 if (loweredLocalName != localName) 535 map.add(loweredLocalName, name); 536 } 537 return map; 538} 539 540static void adjustSVGTagNameCase(AtomicHTMLToken& token) 541{ 542 static NeverDestroyed<HashMap<AtomicString, QualifiedName>> map = createCaseMap(SVGNames::getSVGTags(), SVGNames::SVGTagsCount); 543 const QualifiedName& casedName = map.get().get(token.name()); 544 if (casedName.localName().isNull()) 545 return; 546 token.setName(casedName.localName()); 547} 548 549static inline void adjustAttributes(HashMap<AtomicString, QualifiedName>& map, AtomicHTMLToken& token) 550{ 551 for (auto& attribute : token.attributes()) { 552 const QualifiedName& casedName = map.get(attribute.localName()); 553 if (!casedName.localName().isNull()) 554 attribute.parserSetName(casedName); 555 } 556} 557 558template<const QualifiedName* const* attributesTable(), unsigned attributesTableLength> 559static void adjustAttributes(AtomicHTMLToken& token) 560{ 561 static NeverDestroyed<HashMap<AtomicString, QualifiedName>> map = createCaseMap(attributesTable(), attributesTableLength); 562 adjustAttributes(map, token); 563} 564 565static inline void adjustSVGAttributes(AtomicHTMLToken& token) 566{ 567 adjustAttributes<SVGNames::getSVGAttrs, SVGNames::SVGAttrsCount>(token); 568} 569 570static inline void adjustMathMLAttributes(AtomicHTMLToken& token) 571{ 572 adjustAttributes<MathMLNames::getMathMLAttrs, MathMLNames::MathMLAttrsCount>(token); 573} 574 575static void addNamesWithPrefix(HashMap<AtomicString, QualifiedName>& map, const AtomicString& prefix, const QualifiedName* const names[], unsigned length) 576{ 577 for (unsigned i = 0; i < length; ++i) { 578 const QualifiedName& name = *names[i]; 579 const AtomicString& localName = name.localName(); 580 map.add(prefix + ':' + localName, QualifiedName(prefix, localName, name.namespaceURI())); 581 } 582} 583 584static HashMap<AtomicString, QualifiedName> createForeignAttributesMap() 585{ 586 HashMap<AtomicString, QualifiedName> map; 587 588 addNamesWithPrefix(map, xlinkAtom, XLinkNames::getXLinkAttrs(), XLinkNames::XLinkAttrsCount); 589 addNamesWithPrefix(map, xmlAtom, XMLNames::getXMLAttrs(), XMLNames::XMLAttrsCount); 590 591 map.add(WTF::xmlnsAtom, XMLNSNames::xmlnsAttr); 592 map.add("xmlns:xlink", QualifiedName(xmlnsAtom, xlinkAtom, XMLNSNames::xmlnsNamespaceURI)); 593 594 return map; 595} 596 597static void adjustForeignAttributes(AtomicHTMLToken& token) 598{ 599 static NeverDestroyed<HashMap<AtomicString, QualifiedName>> map = createForeignAttributesMap(); 600 adjustAttributes(map, token); 601} 602 603void HTMLTreeBuilder::processStartTagForInBody(AtomicHTMLToken* token) 604{ 605 ASSERT(token->type() == HTMLToken::StartTag); 606 if (token->name() == htmlTag) { 607 processHtmlStartTagForInBody(token); 608 return; 609 } 610 if (token->name() == baseTag 611 || token->name() == basefontTag 612 || token->name() == bgsoundTag 613 || token->name() == commandTag 614 || token->name() == linkTag 615 || token->name() == metaTag 616 || token->name() == noframesTag 617 || token->name() == scriptTag 618 || token->name() == styleTag 619 || token->name() == titleTag) { 620 bool didProcess = processStartTagForInHead(token); 621 ASSERT_UNUSED(didProcess, didProcess); 622 return; 623 } 624 if (token->name() == bodyTag) { 625 parseError(token); 626 bool fragmentOrTemplateCase = !m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement(); 627#if ENABLE(TEMPLATE_ELEMENT) 628 fragmentOrTemplateCase = fragmentOrTemplateCase || m_tree.openElements()->hasTemplateInHTMLScope(); 629#endif 630 if (fragmentOrTemplateCase) { 631 ASSERT(isParsingFragmentOrTemplateContents()); 632 return; 633 } 634 m_framesetOk = false; 635 m_tree.insertHTMLBodyStartTagInBody(token); 636 return; 637 } 638 if (token->name() == framesetTag) { 639 parseError(token); 640 if (!m_tree.openElements()->secondElementIsHTMLBodyElement() || m_tree.openElements()->hasOnlyOneElement()) { 641 ASSERT(isParsingFragmentOrTemplateContents()); 642 return; 643 } 644 if (!m_framesetOk) 645 return; 646 m_tree.openElements()->bodyElement()->remove(ASSERT_NO_EXCEPTION); 647 m_tree.openElements()->popUntil(m_tree.openElements()->bodyElement()); 648 m_tree.openElements()->popHTMLBodyElement(); 649 ASSERT(m_tree.openElements()->top() == m_tree.openElements()->htmlElement()); 650 m_tree.insertHTMLElement(token); 651 setInsertionMode(InsertionMode::InFrameset); 652 return; 653 } 654 if (token->name() == addressTag 655 || token->name() == articleTag 656 || token->name() == asideTag 657 || token->name() == blockquoteTag 658 || token->name() == centerTag 659 || token->name() == detailsTag 660 || token->name() == dirTag 661 || token->name() == divTag 662 || token->name() == dlTag 663 || token->name() == fieldsetTag 664 || token->name() == figcaptionTag 665 || token->name() == figureTag 666 || token->name() == footerTag 667 || token->name() == headerTag 668 || token->name() == hgroupTag 669 || token->name() == mainTag 670 || token->name() == menuTag 671 || token->name() == navTag 672 || token->name() == olTag 673 || token->name() == pTag 674 || token->name() == sectionTag 675 || token->name() == summaryTag 676 || token->name() == ulTag) { 677 processFakePEndTagIfPInButtonScope(); 678 m_tree.insertHTMLElement(token); 679 return; 680 } 681 if (isNumberedHeaderTag(token->name())) { 682 processFakePEndTagIfPInButtonScope(); 683 if (m_tree.currentStackItem()->isNumberedHeaderElement()) { 684 parseError(token); 685 m_tree.openElements()->pop(); 686 } 687 m_tree.insertHTMLElement(token); 688 return; 689 } 690 if (token->name() == preTag || token->name() == listingTag) { 691 processFakePEndTagIfPInButtonScope(); 692 m_tree.insertHTMLElement(token); 693 m_shouldSkipLeadingNewline = true; 694 m_framesetOk = false; 695 return; 696 } 697 if (token->name() == formTag) { 698 if (m_tree.form() && !isParsingTemplateContents()) { 699 parseError(token); 700 return; 701 } 702 processFakePEndTagIfPInButtonScope(); 703 m_tree.insertHTMLFormElement(token); 704 return; 705 } 706 if (token->name() == liTag) { 707 processCloseWhenNestedTag<isLi>(token); 708 return; 709 } 710 if (token->name() == ddTag || token->name() == dtTag) { 711 processCloseWhenNestedTag<isDdOrDt>(token); 712 return; 713 } 714 if (token->name() == plaintextTag) { 715 processFakePEndTagIfPInButtonScope(); 716 m_tree.insertHTMLElement(token); 717 if (m_parser.tokenizer()) 718 m_parser.tokenizer()->setState(HTMLTokenizer::PLAINTEXTState); 719 return; 720 } 721 if (token->name() == buttonTag) { 722 if (m_tree.openElements()->inScope(buttonTag)) { 723 parseError(token); 724 processFakeEndTag(buttonTag); 725 processStartTag(token); // FIXME: Could we just fall through here? 726 return; 727 } 728 m_tree.reconstructTheActiveFormattingElements(); 729 m_tree.insertHTMLElement(token); 730 m_framesetOk = false; 731 return; 732 } 733 if (token->name() == aTag) { 734 Element* activeATag = m_tree.activeFormattingElements()->closestElementInScopeWithName(aTag.localName()); 735 if (activeATag) { 736 parseError(token); 737 processFakeEndTag(aTag); 738 m_tree.activeFormattingElements()->remove(activeATag); 739 if (m_tree.openElements()->contains(activeATag)) 740 m_tree.openElements()->remove(activeATag); 741 } 742 m_tree.reconstructTheActiveFormattingElements(); 743 m_tree.insertFormattingElement(token); 744 return; 745 } 746 if (isNonAnchorNonNobrFormattingTag(token->name())) { 747 m_tree.reconstructTheActiveFormattingElements(); 748 m_tree.insertFormattingElement(token); 749 return; 750 } 751 if (token->name() == nobrTag) { 752 m_tree.reconstructTheActiveFormattingElements(); 753 if (m_tree.openElements()->inScope(nobrTag)) { 754 parseError(token); 755 processFakeEndTag(nobrTag); 756 m_tree.reconstructTheActiveFormattingElements(); 757 } 758 m_tree.insertFormattingElement(token); 759 return; 760 } 761 if (token->name() == appletTag 762 || token->name() == embedTag 763 || token->name() == objectTag) { 764 if (!pluginContentIsAllowed(m_tree.parserContentPolicy())) 765 return; 766 } 767 if (token->name() == appletTag 768 || token->name() == marqueeTag 769 || token->name() == objectTag) { 770 m_tree.reconstructTheActiveFormattingElements(); 771 m_tree.insertHTMLElement(token); 772 m_tree.activeFormattingElements()->appendMarker(); 773 m_framesetOk = false; 774 return; 775 } 776 if (token->name() == tableTag) { 777 if (!m_tree.inQuirksMode() && m_tree.openElements()->inButtonScope(pTag)) 778 processFakeEndTag(pTag); 779 m_tree.insertHTMLElement(token); 780 m_framesetOk = false; 781 setInsertionMode(InsertionMode::InTable); 782 return; 783 } 784 if (token->name() == imageTag) { 785 parseError(token); 786 // Apparently we're not supposed to ask. 787 token->setName(imgTag.localName()); 788 // Note the fall through to the imgTag handling below! 789 } 790 if (token->name() == areaTag 791 || token->name() == brTag 792 || token->name() == embedTag 793 || token->name() == imgTag 794 || token->name() == keygenTag 795 || token->name() == wbrTag) { 796 m_tree.reconstructTheActiveFormattingElements(); 797 m_tree.insertSelfClosingHTMLElement(token); 798 m_framesetOk = false; 799 return; 800 } 801 if (token->name() == inputTag) { 802 Attribute* typeAttribute = token->getAttributeItem(typeAttr); 803 m_tree.reconstructTheActiveFormattingElements(); 804 m_tree.insertSelfClosingHTMLElement(token); 805 if (!typeAttribute || !equalIgnoringCase(typeAttribute->value(), "hidden")) 806 m_framesetOk = false; 807 return; 808 } 809 if (token->name() == paramTag 810 || token->name() == sourceTag 811 || token->name() == trackTag) { 812 m_tree.insertSelfClosingHTMLElement(token); 813 return; 814 } 815 if (token->name() == hrTag) { 816 processFakePEndTagIfPInButtonScope(); 817 m_tree.insertSelfClosingHTMLElement(token); 818 m_framesetOk = false; 819 return; 820 } 821 if (token->name() == isindexTag) { 822 processIsindexStartTagForInBody(token); 823 return; 824 } 825 if (token->name() == textareaTag) { 826 m_tree.insertHTMLElement(token); 827 m_shouldSkipLeadingNewline = true; 828 if (m_parser.tokenizer()) 829 m_parser.tokenizer()->setState(HTMLTokenizer::RCDATAState); 830 m_originalInsertionMode = m_insertionMode; 831 m_framesetOk = false; 832 setInsertionMode(InsertionMode::Text); 833 return; 834 } 835 if (token->name() == xmpTag) { 836 processFakePEndTagIfPInButtonScope(); 837 m_tree.reconstructTheActiveFormattingElements(); 838 m_framesetOk = false; 839 processGenericRawTextStartTag(token); 840 return; 841 } 842 if (token->name() == iframeTag) { 843 m_framesetOk = false; 844 processGenericRawTextStartTag(token); 845 return; 846 } 847 if (token->name() == noembedTag && m_options.pluginsEnabled) { 848 processGenericRawTextStartTag(token); 849 return; 850 } 851 if (token->name() == noscriptTag && m_options.scriptEnabled) { 852 processGenericRawTextStartTag(token); 853 return; 854 } 855 if (token->name() == selectTag) { 856 m_tree.reconstructTheActiveFormattingElements(); 857 m_tree.insertHTMLElement(token); 858 m_framesetOk = false; 859 if (m_insertionMode == InsertionMode::InTable 860 || m_insertionMode == InsertionMode::InCaption 861 || m_insertionMode == InsertionMode::InColumnGroup 862 || m_insertionMode == InsertionMode::InTableBody 863 || m_insertionMode == InsertionMode::InRow 864 || m_insertionMode == InsertionMode::InCell) 865 setInsertionMode(InsertionMode::InSelectInTable); 866 else 867 setInsertionMode(InsertionMode::InSelect); 868 return; 869 } 870 if (token->name() == optgroupTag || token->name() == optionTag) { 871 if (isHTMLOptionElement(m_tree.currentStackItem()->node())) { 872 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName()); 873 processEndTag(&endOption); 874 } 875 m_tree.reconstructTheActiveFormattingElements(); 876 m_tree.insertHTMLElement(token); 877 return; 878 } 879 if (token->name() == rbTag || token->name() == rpTag || token->name() == rtcTag) { 880 if (m_tree.openElements()->inScope(rubyTag.localName())) { 881 m_tree.generateImpliedEndTags(); 882 if (!m_tree.currentStackItem()->hasTagName(rubyTag)) 883 parseError(token); 884 } 885 m_tree.insertHTMLElement(token); 886 return; 887 } 888 if (token->name() == rtTag) { 889 if (m_tree.openElements()->inScope(rubyTag.localName())) { 890 m_tree.generateImpliedEndTagsWithExclusion(rtcTag.localName()); 891 if (!m_tree.currentStackItem()->hasTagName(rubyTag) && !m_tree.currentStackItem()->hasTagName(rtcTag)) 892 parseError(token); 893 } 894 m_tree.insertHTMLElement(token); 895 return; 896 } 897 if (token->name() == MathMLNames::mathTag.localName()) { 898 m_tree.reconstructTheActiveFormattingElements(); 899 adjustMathMLAttributes(*token); 900 adjustForeignAttributes(*token); 901 m_tree.insertForeignElement(token, MathMLNames::mathmlNamespaceURI); 902 return; 903 } 904 if (token->name() == SVGNames::svgTag.localName()) { 905 m_tree.reconstructTheActiveFormattingElements(); 906 adjustSVGAttributes(*token); 907 adjustForeignAttributes(*token); 908 m_tree.insertForeignElement(token, SVGNames::svgNamespaceURI); 909 return; 910 } 911 if (isCaptionColOrColgroupTag(token->name()) 912 || token->name() == frameTag 913 || token->name() == headTag 914 || isTableBodyContextTag(token->name()) 915 || isTableCellContextTag(token->name()) 916 || token->name() == trTag) { 917 parseError(token); 918 return; 919 } 920#if ENABLE(TEMPLATE_ELEMENT) 921 if (token->name() == templateTag) { 922 processTemplateStartTag(token); 923 return; 924 } 925#endif 926 m_tree.reconstructTheActiveFormattingElements(); 927 m_tree.insertHTMLElement(token); 928} 929 930#if ENABLE(TEMPLATE_ELEMENT) 931void HTMLTreeBuilder::processTemplateStartTag(AtomicHTMLToken* token) 932{ 933 m_tree.activeFormattingElements()->appendMarker(); 934 m_tree.insertHTMLElement(token); 935 m_templateInsertionModes.append(InsertionMode::TemplateContents); 936 setInsertionMode(InsertionMode::TemplateContents); 937} 938 939bool HTMLTreeBuilder::processTemplateEndTag(AtomicHTMLToken* token) 940{ 941 ASSERT(token->name() == templateTag.localName()); 942 if (!m_tree.openElements()->hasTemplateInHTMLScope()) { 943 ASSERT(m_templateInsertionModes.isEmpty() || (m_templateInsertionModes.size() == 1 && m_fragmentContext.contextElement()->hasTagName(templateTag))); 944 parseError(token); 945 return false; 946 } 947 m_tree.generateImpliedEndTags(); 948 if (!m_tree.currentStackItem()->hasTagName(templateTag)) 949 parseError(token); 950 m_tree.openElements()->popUntilPopped(templateTag); 951 m_tree.activeFormattingElements()->clearToLastMarker(); 952 m_templateInsertionModes.removeLast(); 953 resetInsertionModeAppropriately(); 954 return true; 955} 956 957bool HTMLTreeBuilder::processEndOfFileForInTemplateContents(AtomicHTMLToken* token) 958{ 959 AtomicHTMLToken endTemplate(HTMLToken::EndTag, templateTag.localName()); 960 if (!processTemplateEndTag(&endTemplate)) 961 return false; 962 963 processEndOfFile(token); 964 return true; 965} 966#endif 967 968bool HTMLTreeBuilder::processColgroupEndTagForInColumnGroup() 969{ 970 bool ignoreFakeEndTag = m_tree.currentIsRootNode(); 971#if ENABLE(TEMPLATE_ELEMENT) 972 ignoreFakeEndTag = ignoreFakeEndTag || m_tree.currentNode()->hasTagName(templateTag); 973#endif 974 975 if (ignoreFakeEndTag) { 976 ASSERT(isParsingFragmentOrTemplateContents()); 977 // FIXME: parse error 978 return false; 979 } 980 m_tree.openElements()->pop(); 981 setInsertionMode(InsertionMode::InTable); 982 return true; 983} 984 985// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#close-the-cell 986void HTMLTreeBuilder::closeTheCell() 987{ 988 ASSERT(insertionMode() == InsertionMode::InCell); 989 if (m_tree.openElements()->inTableScope(tdTag)) { 990 ASSERT(!m_tree.openElements()->inTableScope(thTag)); 991 processFakeEndTag(tdTag); 992 return; 993 } 994 ASSERT(m_tree.openElements()->inTableScope(thTag)); 995 processFakeEndTag(thTag); 996 ASSERT(insertionMode() == InsertionMode::InRow); 997} 998 999void HTMLTreeBuilder::processStartTagForInTable(AtomicHTMLToken* token) 1000{ 1001 ASSERT(token->type() == HTMLToken::StartTag); 1002 if (token->name() == captionTag) { 1003 m_tree.openElements()->popUntilTableScopeMarker(); 1004 m_tree.activeFormattingElements()->appendMarker(); 1005 m_tree.insertHTMLElement(token); 1006 setInsertionMode(InsertionMode::InCaption); 1007 return; 1008 } 1009 if (token->name() == colgroupTag) { 1010 m_tree.openElements()->popUntilTableScopeMarker(); 1011 m_tree.insertHTMLElement(token); 1012 setInsertionMode(InsertionMode::InColumnGroup); 1013 return; 1014 } 1015 if (token->name() == colTag) { 1016 processFakeStartTag(colgroupTag); 1017 ASSERT(insertionMode() == InsertionMode::InColumnGroup); 1018 processStartTag(token); 1019 return; 1020 } 1021 if (isTableBodyContextTag(token->name())) { 1022 m_tree.openElements()->popUntilTableScopeMarker(); 1023 m_tree.insertHTMLElement(token); 1024 setInsertionMode(InsertionMode::InTableBody); 1025 return; 1026 } 1027 if (isTableCellContextTag(token->name()) 1028 || token->name() == trTag) { 1029 processFakeStartTag(tbodyTag); 1030 ASSERT(insertionMode() == InsertionMode::InTableBody); 1031 processStartTag(token); 1032 return; 1033 } 1034 if (token->name() == tableTag) { 1035 parseError(token); 1036 if (!processTableEndTagForInTable()) { 1037 ASSERT(isParsingFragmentOrTemplateContents()); 1038 return; 1039 } 1040 processStartTag(token); 1041 return; 1042 } 1043 if (token->name() == styleTag || token->name() == scriptTag) { 1044 processStartTagForInHead(token); 1045 return; 1046 } 1047 if (token->name() == inputTag) { 1048 Attribute* typeAttribute = token->getAttributeItem(typeAttr); 1049 if (typeAttribute && equalIgnoringCase(typeAttribute->value(), "hidden")) { 1050 parseError(token); 1051 m_tree.insertSelfClosingHTMLElement(token); 1052 return; 1053 } 1054 // Fall through to "anything else" case. 1055 } 1056 if (token->name() == formTag) { 1057 parseError(token); 1058 if (m_tree.form() && !isParsingTemplateContents()) 1059 return; 1060 m_tree.insertHTMLFormElement(token, true); 1061 m_tree.openElements()->pop(); 1062 return; 1063 } 1064#if ENABLE(TEMPLATE_ELEMENT) 1065 if (token->name() == templateTag) { 1066 processTemplateStartTag(token); 1067 return; 1068 } 1069#endif 1070 parseError(token); 1071 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree); 1072 processStartTagForInBody(token); 1073} 1074 1075void HTMLTreeBuilder::processStartTag(AtomicHTMLToken* token) 1076{ 1077 ASSERT(token->type() == HTMLToken::StartTag); 1078 switch (insertionMode()) { 1079 case InsertionMode::Initial: 1080 ASSERT(insertionMode() == InsertionMode::Initial); 1081 defaultForInitial(); 1082 FALLTHROUGH; 1083 case InsertionMode::BeforeHTML: 1084 ASSERT(insertionMode() == InsertionMode::BeforeHTML); 1085 if (token->name() == htmlTag) { 1086 m_tree.insertHTMLHtmlStartTagBeforeHTML(token); 1087 setInsertionMode(InsertionMode::BeforeHead); 1088 return; 1089 } 1090 defaultForBeforeHTML(); 1091 FALLTHROUGH; 1092 case InsertionMode::BeforeHead: 1093 ASSERT(insertionMode() == InsertionMode::BeforeHead); 1094 if (token->name() == htmlTag) { 1095 processHtmlStartTagForInBody(token); 1096 return; 1097 } 1098 if (token->name() == headTag) { 1099 m_tree.insertHTMLHeadElement(token); 1100 setInsertionMode(InsertionMode::InHead); 1101 return; 1102 } 1103 defaultForBeforeHead(); 1104 FALLTHROUGH; 1105 case InsertionMode::InHead: 1106 ASSERT(insertionMode() == InsertionMode::InHead); 1107 if (processStartTagForInHead(token)) 1108 return; 1109 defaultForInHead(); 1110 FALLTHROUGH; 1111 case InsertionMode::AfterHead: 1112 ASSERT(insertionMode() == InsertionMode::AfterHead); 1113 if (token->name() == htmlTag) { 1114 processHtmlStartTagForInBody(token); 1115 return; 1116 } 1117 if (token->name() == bodyTag) { 1118 m_framesetOk = false; 1119 m_tree.insertHTMLBodyElement(token); 1120 setInsertionMode(InsertionMode::InBody); 1121 return; 1122 } 1123 if (token->name() == framesetTag) { 1124 m_tree.insertHTMLElement(token); 1125 setInsertionMode(InsertionMode::InFrameset); 1126 return; 1127 } 1128 if (token->name() == baseTag 1129 || token->name() == basefontTag 1130 || token->name() == bgsoundTag 1131 || token->name() == linkTag 1132 || token->name() == metaTag 1133 || token->name() == noframesTag 1134 || token->name() == scriptTag 1135 || token->name() == styleTag 1136#if ENABLE(TEMPLATE_ELEMENT) 1137 || token->name() == templateTag 1138#endif 1139 || token->name() == titleTag) { 1140 parseError(token); 1141 ASSERT(m_tree.head()); 1142 m_tree.openElements()->pushHTMLHeadElement(m_tree.headStackItem()); 1143 processStartTagForInHead(token); 1144 m_tree.openElements()->removeHTMLHeadElement(m_tree.head()); 1145 return; 1146 } 1147 if (token->name() == headTag) { 1148 parseError(token); 1149 return; 1150 } 1151 defaultForAfterHead(); 1152 FALLTHROUGH; 1153 case InsertionMode::InBody: 1154 ASSERT(insertionMode() == InsertionMode::InBody); 1155 processStartTagForInBody(token); 1156 break; 1157 case InsertionMode::InTable: 1158 ASSERT(insertionMode() == InsertionMode::InTable); 1159 processStartTagForInTable(token); 1160 break; 1161 case InsertionMode::InCaption: 1162 ASSERT(insertionMode() == InsertionMode::InCaption); 1163 if (isCaptionColOrColgroupTag(token->name()) 1164 || isTableBodyContextTag(token->name()) 1165 || isTableCellContextTag(token->name()) 1166 || token->name() == trTag) { 1167 parseError(token); 1168 if (!processCaptionEndTagForInCaption()) { 1169 ASSERT(isParsingFragment()); 1170 return; 1171 } 1172 processStartTag(token); 1173 return; 1174 } 1175 processStartTagForInBody(token); 1176 break; 1177 case InsertionMode::InColumnGroup: 1178 ASSERT(insertionMode() == InsertionMode::InColumnGroup); 1179 if (token->name() == htmlTag) { 1180 processHtmlStartTagForInBody(token); 1181 return; 1182 } 1183 if (token->name() == colTag) { 1184 m_tree.insertSelfClosingHTMLElement(token); 1185 return; 1186 } 1187#if ENABLE(TEMPLATE_ELEMENT) 1188 if (token->name() == templateTag) { 1189 processTemplateStartTag(token); 1190 return; 1191 } 1192#endif 1193 if (!processColgroupEndTagForInColumnGroup()) { 1194 ASSERT(isParsingFragmentOrTemplateContents()); 1195 return; 1196 } 1197 processStartTag(token); 1198 break; 1199 case InsertionMode::InTableBody: 1200 ASSERT(insertionMode() == InsertionMode::InTableBody); 1201 if (token->name() == trTag) { 1202 m_tree.openElements()->popUntilTableBodyScopeMarker(); // How is there ever anything to pop? 1203 m_tree.insertHTMLElement(token); 1204 setInsertionMode(InsertionMode::InRow); 1205 return; 1206 } 1207 if (isTableCellContextTag(token->name())) { 1208 parseError(token); 1209 processFakeStartTag(trTag); 1210 ASSERT(insertionMode() == InsertionMode::InRow); 1211 processStartTag(token); 1212 return; 1213 } 1214 if (isCaptionColOrColgroupTag(token->name()) || isTableBodyContextTag(token->name())) { 1215 // FIXME: This is slow. 1216 if (!m_tree.openElements()->inTableScope(tbodyTag) && !m_tree.openElements()->inTableScope(theadTag) && !m_tree.openElements()->inTableScope(tfootTag)) { 1217 ASSERT(isParsingFragmentOrTemplateContents()); 1218 parseError(token); 1219 return; 1220 } 1221 m_tree.openElements()->popUntilTableBodyScopeMarker(); 1222 ASSERT(isTableBodyContextTag(m_tree.currentStackItem()->localName())); 1223 processFakeEndTag(m_tree.currentStackItem()->localName()); 1224 processStartTag(token); 1225 return; 1226 } 1227 processStartTagForInTable(token); 1228 break; 1229 case InsertionMode::InRow: 1230 ASSERT(insertionMode() == InsertionMode::InRow); 1231 if (isTableCellContextTag(token->name())) { 1232 m_tree.openElements()->popUntilTableRowScopeMarker(); 1233 m_tree.insertHTMLElement(token); 1234 setInsertionMode(InsertionMode::InCell); 1235 m_tree.activeFormattingElements()->appendMarker(); 1236 return; 1237 } 1238 if (token->name() == trTag 1239 || isCaptionColOrColgroupTag(token->name()) 1240 || isTableBodyContextTag(token->name())) { 1241 if (!processTrEndTagForInRow()) { 1242 ASSERT(isParsingFragmentOrTemplateContents()); 1243 return; 1244 } 1245 ASSERT(insertionMode() == InsertionMode::InTableBody); 1246 processStartTag(token); 1247 return; 1248 } 1249 processStartTagForInTable(token); 1250 break; 1251 case InsertionMode::InCell: 1252 ASSERT(insertionMode() == InsertionMode::InCell); 1253 if (isCaptionColOrColgroupTag(token->name()) 1254 || isTableCellContextTag(token->name()) 1255 || token->name() == trTag 1256 || isTableBodyContextTag(token->name())) { 1257 // FIXME: This could be more efficient. 1258 if (!m_tree.openElements()->inTableScope(tdTag) && !m_tree.openElements()->inTableScope(thTag)) { 1259 ASSERT(isParsingFragment()); 1260 parseError(token); 1261 return; 1262 } 1263 closeTheCell(); 1264 processStartTag(token); 1265 return; 1266 } 1267 processStartTagForInBody(token); 1268 break; 1269 case InsertionMode::AfterBody: 1270 case InsertionMode::AfterAfterBody: 1271 ASSERT(insertionMode() == InsertionMode::AfterBody || insertionMode() == InsertionMode::AfterAfterBody); 1272 if (token->name() == htmlTag) { 1273 processHtmlStartTagForInBody(token); 1274 return; 1275 } 1276 setInsertionMode(InsertionMode::InBody); 1277 processStartTag(token); 1278 break; 1279 case InsertionMode::InHeadNoscript: 1280 ASSERT(insertionMode() == InsertionMode::InHeadNoscript); 1281 if (token->name() == htmlTag) { 1282 processHtmlStartTagForInBody(token); 1283 return; 1284 } 1285 if (token->name() == basefontTag 1286 || token->name() == bgsoundTag 1287 || token->name() == linkTag 1288 || token->name() == metaTag 1289 || token->name() == noframesTag 1290 || token->name() == styleTag) { 1291 bool didProcess = processStartTagForInHead(token); 1292 ASSERT_UNUSED(didProcess, didProcess); 1293 return; 1294 } 1295 if (token->name() == htmlTag || token->name() == noscriptTag) { 1296 parseError(token); 1297 return; 1298 } 1299 defaultForInHeadNoscript(); 1300 processToken(token); 1301 break; 1302 case InsertionMode::InFrameset: 1303 ASSERT(insertionMode() == InsertionMode::InFrameset); 1304 if (token->name() == htmlTag) { 1305 processHtmlStartTagForInBody(token); 1306 return; 1307 } 1308 if (token->name() == framesetTag) { 1309 m_tree.insertHTMLElement(token); 1310 return; 1311 } 1312 if (token->name() == frameTag) { 1313 m_tree.insertSelfClosingHTMLElement(token); 1314 return; 1315 } 1316 if (token->name() == noframesTag) { 1317 processStartTagForInHead(token); 1318 return; 1319 } 1320#if ENABLE(TEMPLATE_ELEMENT) 1321 if (token->name() == templateTag) { 1322 processTemplateStartTag(token); 1323 return; 1324 } 1325#endif 1326 parseError(token); 1327 break; 1328 case InsertionMode::AfterFrameset: 1329 case InsertionMode::AfterAfterFrameset: 1330 ASSERT(insertionMode() == InsertionMode::AfterFrameset || insertionMode() == InsertionMode::AfterAfterFrameset); 1331 if (token->name() == htmlTag) { 1332 processHtmlStartTagForInBody(token); 1333 return; 1334 } 1335 if (token->name() == noframesTag) { 1336 processStartTagForInHead(token); 1337 return; 1338 } 1339 parseError(token); 1340 break; 1341 case InsertionMode::InSelectInTable: 1342 ASSERT(insertionMode() == InsertionMode::InSelectInTable); 1343 if (token->name() == captionTag 1344 || token->name() == tableTag 1345 || isTableBodyContextTag(token->name()) 1346 || token->name() == trTag 1347 || isTableCellContextTag(token->name())) { 1348 parseError(token); 1349 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName()); 1350 processEndTag(&endSelect); 1351 processStartTag(token); 1352 return; 1353 } 1354 FALLTHROUGH; 1355 case InsertionMode::InSelect: 1356 ASSERT(insertionMode() == InsertionMode::InSelect || insertionMode() == InsertionMode::InSelectInTable); 1357 if (token->name() == htmlTag) { 1358 processHtmlStartTagForInBody(token); 1359 return; 1360 } 1361 if (token->name() == optionTag) { 1362 if (isHTMLOptionElement(m_tree.currentStackItem()->node())) { 1363 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName()); 1364 processEndTag(&endOption); 1365 } 1366 m_tree.insertHTMLElement(token); 1367 return; 1368 } 1369 if (token->name() == optgroupTag) { 1370 if (isHTMLOptionElement(m_tree.currentStackItem()->node())) { 1371 AtomicHTMLToken endOption(HTMLToken::EndTag, optionTag.localName()); 1372 processEndTag(&endOption); 1373 } 1374 if (isHTMLOptGroupElement(m_tree.currentStackItem()->node())) { 1375 AtomicHTMLToken endOptgroup(HTMLToken::EndTag, optgroupTag.localName()); 1376 processEndTag(&endOptgroup); 1377 } 1378 m_tree.insertHTMLElement(token); 1379 return; 1380 } 1381 if (token->name() == selectTag) { 1382 parseError(token); 1383 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName()); 1384 processEndTag(&endSelect); 1385 return; 1386 } 1387 if (token->name() == inputTag 1388 || token->name() == keygenTag 1389 || token->name() == textareaTag) { 1390 parseError(token); 1391 if (!m_tree.openElements()->inSelectScope(selectTag)) { 1392 ASSERT(isParsingFragment()); 1393 return; 1394 } 1395 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName()); 1396 processEndTag(&endSelect); 1397 processStartTag(token); 1398 return; 1399 } 1400 if (token->name() == scriptTag) { 1401 bool didProcess = processStartTagForInHead(token); 1402 ASSERT_UNUSED(didProcess, didProcess); 1403 return; 1404 } 1405#if ENABLE(TEMPLATE_ELEMENT) 1406 if (token->name() == templateTag) { 1407 processTemplateStartTag(token); 1408 return; 1409 } 1410#endif 1411 break; 1412 case InsertionMode::InTableText: 1413 defaultForInTableText(); 1414 processStartTag(token); 1415 break; 1416 case InsertionMode::Text: 1417 ASSERT_NOT_REACHED(); 1418 break; 1419 case InsertionMode::TemplateContents: 1420#if ENABLE(TEMPLATE_ELEMENT) 1421 if (token->name() == templateTag) { 1422 processTemplateStartTag(token); 1423 return; 1424 } 1425 1426 if (token->name() == linkTag 1427 || token->name() == scriptTag 1428 || token->name() == styleTag 1429 || token->name() == metaTag) { 1430 processStartTagForInHead(token); 1431 return; 1432 } 1433 1434 InsertionMode insertionMode = InsertionMode::TemplateContents; 1435 if (token->name() == frameTag) 1436 insertionMode = InsertionMode::InFrameset; 1437 else if (token->name() == colTag) 1438 insertionMode = InsertionMode::InColumnGroup; 1439 else if (isCaptionColOrColgroupTag(token->name()) || isTableBodyContextTag(token->name())) 1440 insertionMode = InsertionMode::InTable; 1441 else if (token->name() == trTag) 1442 insertionMode = InsertionMode::InTableBody; 1443 else if (isTableCellContextTag(token->name())) 1444 insertionMode = InsertionMode::InRow; 1445 else 1446 insertionMode = InsertionMode::InBody; 1447 1448 ASSERT(insertionMode != InsertionMode::TemplateContents); 1449 ASSERT(m_templateInsertionModes.last() == InsertionMode::TemplateContents); 1450 m_templateInsertionModes.last() = insertionMode; 1451 setInsertionMode(insertionMode); 1452 1453 processStartTag(token); 1454#else 1455 ASSERT_NOT_REACHED(); 1456#endif 1457 break; 1458 } 1459} 1460 1461void HTMLTreeBuilder::processHtmlStartTagForInBody(AtomicHTMLToken* token) 1462{ 1463 parseError(token); 1464#if ENABLE(TEMPLATE_ELEMENT) 1465 if (m_tree.openElements()->hasTemplateInHTMLScope()) { 1466 ASSERT(isParsingTemplateContents()); 1467 return; 1468 } 1469#endif 1470 m_tree.insertHTMLHtmlStartTagInBody(token); 1471} 1472 1473bool HTMLTreeBuilder::processBodyEndTagForInBody(AtomicHTMLToken* token) 1474{ 1475 ASSERT(token->type() == HTMLToken::EndTag); 1476 ASSERT(token->name() == bodyTag); 1477 if (!m_tree.openElements()->inScope(bodyTag.localName())) { 1478 parseError(token); 1479 return false; 1480 } 1481 notImplemented(); // Emit a more specific parse error based on stack contents. 1482 setInsertionMode(InsertionMode::AfterBody); 1483 return true; 1484} 1485 1486void HTMLTreeBuilder::processAnyOtherEndTagForInBody(AtomicHTMLToken* token) 1487{ 1488 ASSERT(token->type() == HTMLToken::EndTag); 1489 HTMLElementStack::ElementRecord* record = m_tree.openElements()->topRecord(); 1490 while (1) { 1491 RefPtr<HTMLStackItem> item = record->stackItem(); 1492 if (item->matchesHTMLTag(token->name())) { 1493 m_tree.generateImpliedEndTagsWithExclusion(token->name()); 1494 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name())) 1495 parseError(token); 1496 m_tree.openElements()->popUntilPopped(item->element()); 1497 return; 1498 } 1499 if (item->isSpecialNode()) { 1500 parseError(token); 1501 return; 1502 } 1503 record = record->next(); 1504 } 1505} 1506 1507// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody 1508void HTMLTreeBuilder::callTheAdoptionAgency(AtomicHTMLToken* token) 1509{ 1510 // The adoption agency algorithm is N^2. We limit the number of iterations 1511 // to stop from hanging the whole browser. This limit is specified in the 1512 // adoption agency algorithm: 1513 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-inbody 1514 static const int outerIterationLimit = 8; 1515 static const int innerIterationLimit = 3; 1516 1517 // 1, 2, 3 and 16 are covered by the for() loop. 1518 for (int i = 0; i < outerIterationLimit; ++i) { 1519 // 4. 1520 Element* formattingElement = m_tree.activeFormattingElements()->closestElementInScopeWithName(token->name()); 1521 // 4.a 1522 if (!formattingElement) 1523 return processAnyOtherEndTagForInBody(token); 1524 // 4.c 1525 if ((m_tree.openElements()->contains(formattingElement)) && !m_tree.openElements()->inScope(formattingElement)) { 1526 parseError(token); 1527 notImplemented(); // Check the stack of open elements for a more specific parse error. 1528 return; 1529 } 1530 // 4.b 1531 HTMLElementStack::ElementRecord* formattingElementRecord = m_tree.openElements()->find(formattingElement); 1532 if (!formattingElementRecord) { 1533 parseError(token); 1534 m_tree.activeFormattingElements()->remove(formattingElement); 1535 return; 1536 } 1537 // 4.d 1538 if (formattingElement != m_tree.currentElement()) 1539 parseError(token); 1540 // 5. 1541 HTMLElementStack::ElementRecord* furthestBlock = m_tree.openElements()->furthestBlockForFormattingElement(formattingElement); 1542 // 6. 1543 if (!furthestBlock) { 1544 m_tree.openElements()->popUntilPopped(formattingElement); 1545 m_tree.activeFormattingElements()->remove(formattingElement); 1546 return; 1547 } 1548 // 7. 1549 ASSERT(furthestBlock->isAbove(formattingElementRecord)); 1550 RefPtr<HTMLStackItem> commonAncestor = formattingElementRecord->next()->stackItem(); 1551 // 8. 1552 HTMLFormattingElementList::Bookmark bookmark = m_tree.activeFormattingElements()->bookmarkFor(formattingElement); 1553 // 9. 1554 HTMLElementStack::ElementRecord* node = furthestBlock; 1555 HTMLElementStack::ElementRecord* nextNode = node->next(); 1556 HTMLElementStack::ElementRecord* lastNode = furthestBlock; 1557 // 9.1, 9.2, 9.3 and 9.11 are covered by the for() loop. 1558 for (int i = 0; i < innerIterationLimit; ++i) { 1559 // 9.4 1560 node = nextNode; 1561 ASSERT(node); 1562 nextNode = node->next(); // Save node->next() for the next iteration in case node is deleted in 9.5. 1563 // 9.5 1564 if (!m_tree.activeFormattingElements()->contains(node->element())) { 1565 m_tree.openElements()->remove(node->element()); 1566 node = 0; 1567 continue; 1568 } 1569 // 9.6 1570 if (node == formattingElementRecord) 1571 break; 1572 // 9.7 1573 RefPtr<HTMLStackItem> newItem = m_tree.createElementFromSavedToken(node->stackItem().get()); 1574 1575 HTMLFormattingElementList::Entry* nodeEntry = m_tree.activeFormattingElements()->find(node->element()); 1576 nodeEntry->replaceElement(newItem); 1577 node->replaceElement(newItem.release()); 1578 1579 // 9.8 1580 if (lastNode == furthestBlock) 1581 bookmark.moveToAfter(nodeEntry); 1582 // 9.9 1583 m_tree.reparent(*node, *lastNode); 1584 // 9.10 1585 lastNode = node; 1586 } 1587 // 10. 1588 m_tree.insertAlreadyParsedChild(*commonAncestor, *lastNode); 1589 // 11. 1590 RefPtr<HTMLStackItem> newItem = m_tree.createElementFromSavedToken(formattingElementRecord->stackItem().get()); 1591 // 12. 1592 m_tree.takeAllChildren(*newItem, *furthestBlock); 1593 // 13. 1594 m_tree.reparent(*furthestBlock, *newItem); 1595 // 14. 1596 m_tree.activeFormattingElements()->swapTo(formattingElement, newItem, bookmark); 1597 // 15. 1598 m_tree.openElements()->remove(formattingElement); 1599 m_tree.openElements()->insertAbove(newItem, furthestBlock); 1600 } 1601} 1602 1603void HTMLTreeBuilder::resetInsertionModeAppropriately() 1604{ 1605 // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#reset-the-insertion-mode-appropriately 1606 bool last = false; 1607 HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord(); 1608 while (1) { 1609 RefPtr<HTMLStackItem> item = nodeRecord->stackItem(); 1610 if (item->node() == m_tree.openElements()->rootNode()) { 1611 last = true; 1612#if ENABLE(TEMPLATE_ELEMENT) 1613 bool shouldCreateItem = isParsingFragment(); 1614#else 1615 ASSERT(isParsingFragment()); 1616 bool shouldCreateItem = true; 1617#endif 1618 if (shouldCreateItem) 1619 item = HTMLStackItem::create(m_fragmentContext.contextElement(), HTMLStackItem::ItemForContextElement); 1620 } 1621#if ENABLE(TEMPLATE_ELEMENT) 1622 if (item->hasTagName(templateTag)) 1623 return setInsertionMode(m_templateInsertionModes.last()); 1624#endif 1625 if (item->hasTagName(selectTag)) { 1626#if ENABLE(TEMPLATE_ELEMENT) 1627 if (!last) { 1628 while (item->node() != m_tree.openElements()->rootNode() && !item->hasTagName(templateTag)) { 1629 nodeRecord = nodeRecord->next(); 1630 item = nodeRecord->stackItem(); 1631 if (isHTMLTableElement(item->node())) 1632 return setInsertionMode(InsertionMode::InSelectInTable); 1633 } 1634 } 1635#endif 1636 return setInsertionMode(InsertionMode::InSelect); 1637 } 1638 if (item->hasTagName(tdTag) || item->hasTagName(thTag)) 1639 return setInsertionMode(InsertionMode::InCell); 1640 if (item->hasTagName(trTag)) 1641 return setInsertionMode(InsertionMode::InRow); 1642 if (item->hasTagName(tbodyTag) || item->hasTagName(theadTag) || item->hasTagName(tfootTag)) 1643 return setInsertionMode(InsertionMode::InTableBody); 1644 if (item->hasTagName(captionTag)) 1645 return setInsertionMode(InsertionMode::InCaption); 1646 if (item->hasTagName(colgroupTag)) { 1647 return setInsertionMode(InsertionMode::InColumnGroup); 1648 } 1649 if (isHTMLTableElement(item->node())) 1650 return setInsertionMode(InsertionMode::InTable); 1651 if (item->hasTagName(headTag)) { 1652#if ENABLE(TEMPLATE_ELEMENT) 1653 if (!m_fragmentContext.fragment() || m_fragmentContext.contextElement() != item->node()) 1654 return setInsertionMode(InsertionMode::InHead); 1655#endif 1656 return setInsertionMode(InsertionMode::InBody); 1657 } 1658 if (item->hasTagName(bodyTag)) 1659 return setInsertionMode(InsertionMode::InBody); 1660 if (item->hasTagName(framesetTag)) { 1661 return setInsertionMode(InsertionMode::InFrameset); 1662 } 1663 if (item->hasTagName(htmlTag)) { 1664 if (m_tree.headStackItem()) 1665 return setInsertionMode(InsertionMode::AfterHead); 1666 ASSERT(isParsingFragment()); 1667 return setInsertionMode(InsertionMode::BeforeHead); 1668 } 1669 if (last) { 1670 ASSERT(isParsingFragment()); 1671 return setInsertionMode(InsertionMode::InBody); 1672 } 1673 nodeRecord = nodeRecord->next(); 1674 } 1675} 1676 1677void HTMLTreeBuilder::processEndTagForInTableBody(AtomicHTMLToken* token) 1678{ 1679 ASSERT(token->type() == HTMLToken::EndTag); 1680 if (isTableBodyContextTag(token->name())) { 1681 if (!m_tree.openElements()->inTableScope(token->name())) { 1682 parseError(token); 1683 return; 1684 } 1685 m_tree.openElements()->popUntilTableBodyScopeMarker(); 1686 m_tree.openElements()->pop(); 1687 setInsertionMode(InsertionMode::InTable); 1688 return; 1689 } 1690 if (token->name() == tableTag) { 1691 // FIXME: This is slow. 1692 if (!m_tree.openElements()->inTableScope(tbodyTag) && !m_tree.openElements()->inTableScope(theadTag) && !m_tree.openElements()->inTableScope(tfootTag)) { 1693 ASSERT(isParsingFragmentOrTemplateContents()); 1694 parseError(token); 1695 return; 1696 } 1697 m_tree.openElements()->popUntilTableBodyScopeMarker(); 1698 ASSERT(isTableBodyContextTag(m_tree.currentStackItem()->localName())); 1699 processFakeEndTag(m_tree.currentStackItem()->localName()); 1700 processEndTag(token); 1701 return; 1702 } 1703 if (token->name() == bodyTag 1704 || isCaptionColOrColgroupTag(token->name()) 1705 || token->name() == htmlTag 1706 || isTableCellContextTag(token->name()) 1707 || token->name() == trTag) { 1708 parseError(token); 1709 return; 1710 } 1711 processEndTagForInTable(token); 1712} 1713 1714void HTMLTreeBuilder::processEndTagForInRow(AtomicHTMLToken* token) 1715{ 1716 ASSERT(token->type() == HTMLToken::EndTag); 1717 if (token->name() == trTag) { 1718 processTrEndTagForInRow(); 1719 return; 1720 } 1721 if (token->name() == tableTag) { 1722 if (!processTrEndTagForInRow()) { 1723 ASSERT(isParsingFragmentOrTemplateContents()); 1724 return; 1725 } 1726 ASSERT(insertionMode() == InsertionMode::InTableBody); 1727 processEndTag(token); 1728 return; 1729 } 1730 if (isTableBodyContextTag(token->name())) { 1731 if (!m_tree.openElements()->inTableScope(token->name())) { 1732 parseError(token); 1733 return; 1734 } 1735 processFakeEndTag(trTag); 1736 ASSERT(insertionMode() == InsertionMode::InTableBody); 1737 processEndTag(token); 1738 return; 1739 } 1740 if (token->name() == bodyTag 1741 || isCaptionColOrColgroupTag(token->name()) 1742 || token->name() == htmlTag 1743 || isTableCellContextTag(token->name())) { 1744 parseError(token); 1745 return; 1746 } 1747 processEndTagForInTable(token); 1748} 1749 1750void HTMLTreeBuilder::processEndTagForInCell(AtomicHTMLToken* token) 1751{ 1752 ASSERT(token->type() == HTMLToken::EndTag); 1753 if (isTableCellContextTag(token->name())) { 1754 if (!m_tree.openElements()->inTableScope(token->name())) { 1755 parseError(token); 1756 return; 1757 } 1758 m_tree.generateImpliedEndTags(); 1759 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name())) 1760 parseError(token); 1761 m_tree.openElements()->popUntilPopped(token->name()); 1762 m_tree.activeFormattingElements()->clearToLastMarker(); 1763 setInsertionMode(InsertionMode::InRow); 1764 return; 1765 } 1766 if (token->name() == bodyTag 1767 || isCaptionColOrColgroupTag(token->name()) 1768 || token->name() == htmlTag) { 1769 parseError(token); 1770 return; 1771 } 1772 if (token->name() == tableTag 1773 || token->name() == trTag 1774 || isTableBodyContextTag(token->name())) { 1775 if (!m_tree.openElements()->inTableScope(token->name())) { 1776#if ENABLE(TEMPLATE_ELEMENT) 1777 ASSERT(isTableBodyContextTag(token->name()) || m_tree.openElements()->inTableScope(templateTag) || isParsingFragment()); 1778#else 1779 ASSERT(isTableBodyContextTag(token->name()) || isParsingFragment()); 1780#endif 1781 parseError(token); 1782 return; 1783 } 1784 closeTheCell(); 1785 processEndTag(token); 1786 return; 1787 } 1788 processEndTagForInBody(token); 1789} 1790 1791void HTMLTreeBuilder::processEndTagForInBody(AtomicHTMLToken* token) 1792{ 1793 ASSERT(token->type() == HTMLToken::EndTag); 1794 if (token->name() == bodyTag) { 1795 processBodyEndTagForInBody(token); 1796 return; 1797 } 1798 if (token->name() == htmlTag) { 1799 AtomicHTMLToken endBody(HTMLToken::EndTag, bodyTag.localName()); 1800 if (processBodyEndTagForInBody(&endBody)) 1801 processEndTag(token); 1802 return; 1803 } 1804 if (token->name() == addressTag 1805 || token->name() == articleTag 1806 || token->name() == asideTag 1807 || token->name() == blockquoteTag 1808 || token->name() == buttonTag 1809 || token->name() == centerTag 1810 || token->name() == detailsTag 1811 || token->name() == dirTag 1812 || token->name() == divTag 1813 || token->name() == dlTag 1814 || token->name() == fieldsetTag 1815 || token->name() == figcaptionTag 1816 || token->name() == figureTag 1817 || token->name() == footerTag 1818 || token->name() == headerTag 1819 || token->name() == hgroupTag 1820 || token->name() == listingTag 1821 || token->name() == mainTag 1822 || token->name() == menuTag 1823 || token->name() == navTag 1824 || token->name() == olTag 1825 || token->name() == preTag 1826 || token->name() == sectionTag 1827 || token->name() == summaryTag 1828 || token->name() == ulTag) { 1829 if (!m_tree.openElements()->inScope(token->name())) { 1830 parseError(token); 1831 return; 1832 } 1833 m_tree.generateImpliedEndTags(); 1834 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name())) 1835 parseError(token); 1836 m_tree.openElements()->popUntilPopped(token->name()); 1837 return; 1838 } 1839 if (token->name() == formTag) { 1840 if (!isParsingTemplateContents()) { 1841 RefPtr<Element> node = m_tree.takeForm(); 1842 if (!node || !m_tree.openElements()->inScope(node.get())) { 1843 parseError(token); 1844 return; 1845 } 1846 m_tree.generateImpliedEndTags(); 1847 if (m_tree.currentNode() != node.get()) 1848 parseError(token); 1849 m_tree.openElements()->remove(node.get()); 1850 } else { 1851 if (!m_tree.openElements()->inScope(token->name())) { 1852 parseError(token); 1853 return; 1854 } 1855 m_tree.generateImpliedEndTags(); 1856 if (!m_tree.currentNode()->hasTagName(formTag)) 1857 parseError(token); 1858 m_tree.openElements()->popUntilPopped(token->name()); 1859 } 1860 } 1861 if (token->name() == pTag) { 1862 if (!m_tree.openElements()->inButtonScope(token->name())) { 1863 parseError(token); 1864 processFakeStartTag(pTag); 1865 ASSERT(m_tree.openElements()->inScope(token->name())); 1866 processEndTag(token); 1867 return; 1868 } 1869 m_tree.generateImpliedEndTagsWithExclusion(token->name()); 1870 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name())) 1871 parseError(token); 1872 m_tree.openElements()->popUntilPopped(token->name()); 1873 return; 1874 } 1875 if (token->name() == liTag) { 1876 if (!m_tree.openElements()->inListItemScope(token->name())) { 1877 parseError(token); 1878 return; 1879 } 1880 m_tree.generateImpliedEndTagsWithExclusion(token->name()); 1881 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name())) 1882 parseError(token); 1883 m_tree.openElements()->popUntilPopped(token->name()); 1884 return; 1885 } 1886 if (token->name() == ddTag 1887 || token->name() == dtTag) { 1888 if (!m_tree.openElements()->inScope(token->name())) { 1889 parseError(token); 1890 return; 1891 } 1892 m_tree.generateImpliedEndTagsWithExclusion(token->name()); 1893 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name())) 1894 parseError(token); 1895 m_tree.openElements()->popUntilPopped(token->name()); 1896 return; 1897 } 1898 if (isNumberedHeaderTag(token->name())) { 1899 if (!m_tree.openElements()->hasNumberedHeaderElementInScope()) { 1900 parseError(token); 1901 return; 1902 } 1903 m_tree.generateImpliedEndTags(); 1904 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name())) 1905 parseError(token); 1906 m_tree.openElements()->popUntilNumberedHeaderElementPopped(); 1907 return; 1908 } 1909 if (isFormattingTag(token->name())) { 1910 callTheAdoptionAgency(token); 1911 return; 1912 } 1913 if (token->name() == appletTag 1914 || token->name() == marqueeTag 1915 || token->name() == objectTag) { 1916 if (!m_tree.openElements()->inScope(token->name())) { 1917 parseError(token); 1918 return; 1919 } 1920 m_tree.generateImpliedEndTags(); 1921 if (!m_tree.currentStackItem()->matchesHTMLTag(token->name())) 1922 parseError(token); 1923 m_tree.openElements()->popUntilPopped(token->name()); 1924 m_tree.activeFormattingElements()->clearToLastMarker(); 1925 return; 1926 } 1927 if (token->name() == brTag) { 1928 parseError(token); 1929 processFakeStartTag(brTag); 1930 return; 1931 } 1932#if ENABLE(TEMPLATE_ELEMENT) 1933 if (token->name() == templateTag) { 1934 processTemplateEndTag(token); 1935 return; 1936 } 1937#endif 1938 processAnyOtherEndTagForInBody(token); 1939} 1940 1941bool HTMLTreeBuilder::processCaptionEndTagForInCaption() 1942{ 1943 if (!m_tree.openElements()->inTableScope(captionTag.localName())) { 1944 ASSERT(isParsingFragment()); 1945 // FIXME: parse error 1946 return false; 1947 } 1948 m_tree.generateImpliedEndTags(); 1949 // FIXME: parse error if (!m_tree.currentStackItem()->hasTagName(captionTag)) 1950 m_tree.openElements()->popUntilPopped(captionTag.localName()); 1951 m_tree.activeFormattingElements()->clearToLastMarker(); 1952 setInsertionMode(InsertionMode::InTable); 1953 return true; 1954} 1955 1956bool HTMLTreeBuilder::processTrEndTagForInRow() 1957{ 1958 if (!m_tree.openElements()->inTableScope(trTag)) { 1959 ASSERT(isParsingFragmentOrTemplateContents()); 1960 // FIXME: parse error 1961 return false; 1962 } 1963 m_tree.openElements()->popUntilTableRowScopeMarker(); 1964 ASSERT(m_tree.currentStackItem()->hasTagName(trTag)); 1965 m_tree.openElements()->pop(); 1966 setInsertionMode(InsertionMode::InTableBody); 1967 return true; 1968} 1969 1970bool HTMLTreeBuilder::processTableEndTagForInTable() 1971{ 1972 if (!m_tree.openElements()->inTableScope(tableTag)) { 1973 ASSERT(isParsingFragmentOrTemplateContents()); 1974 // FIXME: parse error. 1975 return false; 1976 } 1977 m_tree.openElements()->popUntilPopped(tableTag.localName()); 1978 resetInsertionModeAppropriately(); 1979 return true; 1980} 1981 1982void HTMLTreeBuilder::processEndTagForInTable(AtomicHTMLToken* token) 1983{ 1984 ASSERT(token->type() == HTMLToken::EndTag); 1985 if (token->name() == tableTag) { 1986 processTableEndTagForInTable(); 1987 return; 1988 } 1989 if (token->name() == bodyTag 1990 || isCaptionColOrColgroupTag(token->name()) 1991 || token->name() == htmlTag 1992 || isTableBodyContextTag(token->name()) 1993 || isTableCellContextTag(token->name()) 1994 || token->name() == trTag) { 1995 parseError(token); 1996 return; 1997 } 1998 parseError(token); 1999 // Is this redirection necessary here? 2000 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree); 2001 processEndTagForInBody(token); 2002} 2003 2004void HTMLTreeBuilder::processEndTag(AtomicHTMLToken* token) 2005{ 2006 ASSERT(token->type() == HTMLToken::EndTag); 2007 switch (insertionMode()) { 2008 case InsertionMode::Initial: 2009 ASSERT(insertionMode() == InsertionMode::Initial); 2010 defaultForInitial(); 2011 FALLTHROUGH; 2012 case InsertionMode::BeforeHTML: 2013 ASSERT(insertionMode() == InsertionMode::BeforeHTML); 2014 if (token->name() != headTag && token->name() != bodyTag && token->name() != htmlTag && token->name() != brTag) { 2015 parseError(token); 2016 return; 2017 } 2018 defaultForBeforeHTML(); 2019 FALLTHROUGH; 2020 case InsertionMode::BeforeHead: 2021 ASSERT(insertionMode() == InsertionMode::BeforeHead); 2022 if (token->name() != headTag && token->name() != bodyTag && token->name() != htmlTag && token->name() != brTag) { 2023 parseError(token); 2024 return; 2025 } 2026 defaultForBeforeHead(); 2027 FALLTHROUGH; 2028 case InsertionMode::InHead: 2029 ASSERT(insertionMode() == InsertionMode::InHead); 2030 // FIXME: This case should be broken out into processEndTagForInHead, 2031 // because other end tag cases now refer to it ("process the token for using the rules of the "in head" insertion mode"). 2032 // but because the logic falls through to InsertionMode::AfterHead, that gets a little messy. 2033#if ENABLE(TEMPLATE_ELEMENT) 2034 if (token->name() == templateTag) { 2035 processTemplateEndTag(token); 2036 return; 2037 } 2038#endif 2039 if (token->name() == headTag) { 2040 m_tree.openElements()->popHTMLHeadElement(); 2041 setInsertionMode(InsertionMode::AfterHead); 2042 return; 2043 } 2044 if (token->name() != bodyTag && token->name() != htmlTag && token->name() != brTag) { 2045 parseError(token); 2046 return; 2047 } 2048 defaultForInHead(); 2049 FALLTHROUGH; 2050 case InsertionMode::AfterHead: 2051 ASSERT(insertionMode() == InsertionMode::AfterHead); 2052 if (token->name() != bodyTag && token->name() != htmlTag && token->name() != brTag) { 2053 parseError(token); 2054 return; 2055 } 2056 defaultForAfterHead(); 2057 FALLTHROUGH; 2058 case InsertionMode::InBody: 2059 ASSERT(insertionMode() == InsertionMode::InBody); 2060 processEndTagForInBody(token); 2061 break; 2062 case InsertionMode::InTable: 2063 ASSERT(insertionMode() == InsertionMode::InTable); 2064 processEndTagForInTable(token); 2065 break; 2066 case InsertionMode::InCaption: 2067 ASSERT(insertionMode() == InsertionMode::InCaption); 2068 if (token->name() == captionTag) { 2069 processCaptionEndTagForInCaption(); 2070 return; 2071 } 2072 if (token->name() == tableTag) { 2073 parseError(token); 2074 if (!processCaptionEndTagForInCaption()) { 2075 ASSERT(isParsingFragment()); 2076 return; 2077 } 2078 processEndTag(token); 2079 return; 2080 } 2081 if (token->name() == bodyTag 2082 || token->name() == colTag 2083 || token->name() == colgroupTag 2084 || token->name() == htmlTag 2085 || isTableBodyContextTag(token->name()) 2086 || isTableCellContextTag(token->name()) 2087 || token->name() == trTag) { 2088 parseError(token); 2089 return; 2090 } 2091 processEndTagForInBody(token); 2092 break; 2093 case InsertionMode::InColumnGroup: 2094 ASSERT(insertionMode() == InsertionMode::InColumnGroup); 2095 if (token->name() == colgroupTag) { 2096 processColgroupEndTagForInColumnGroup(); 2097 return; 2098 } 2099 if (token->name() == colTag) { 2100 parseError(token); 2101 return; 2102 } 2103#if ENABLE(TEMPLATE_ELEMENT) 2104 if (token->name() == templateTag) { 2105 processTemplateEndTag(token); 2106 return; 2107 } 2108#endif 2109 if (!processColgroupEndTagForInColumnGroup()) { 2110 ASSERT(isParsingFragmentOrTemplateContents()); 2111 return; 2112 } 2113 processEndTag(token); 2114 break; 2115 case InsertionMode::InRow: 2116 ASSERT(insertionMode() == InsertionMode::InRow); 2117 processEndTagForInRow(token); 2118 break; 2119 case InsertionMode::InCell: 2120 ASSERT(insertionMode() == InsertionMode::InCell); 2121 processEndTagForInCell(token); 2122 break; 2123 case InsertionMode::InTableBody: 2124 ASSERT(insertionMode() == InsertionMode::InTableBody); 2125 processEndTagForInTableBody(token); 2126 break; 2127 case InsertionMode::AfterBody: 2128 ASSERT(insertionMode() == InsertionMode::AfterBody); 2129 if (token->name() == htmlTag) { 2130 if (isParsingFragment()) { 2131 parseError(token); 2132 return; 2133 } 2134 setInsertionMode(InsertionMode::AfterAfterBody); 2135 return; 2136 } 2137 FALLTHROUGH; 2138 case InsertionMode::AfterAfterBody: 2139 ASSERT(insertionMode() == InsertionMode::AfterBody || insertionMode() == InsertionMode::AfterAfterBody); 2140 parseError(token); 2141 setInsertionMode(InsertionMode::InBody); 2142 processEndTag(token); 2143 break; 2144 case InsertionMode::InHeadNoscript: 2145 ASSERT(insertionMode() == InsertionMode::InHeadNoscript); 2146 if (token->name() == noscriptTag) { 2147 ASSERT(m_tree.currentStackItem()->hasTagName(noscriptTag)); 2148 m_tree.openElements()->pop(); 2149 ASSERT(m_tree.currentStackItem()->hasTagName(headTag)); 2150 setInsertionMode(InsertionMode::InHead); 2151 return; 2152 } 2153 if (token->name() != brTag) { 2154 parseError(token); 2155 return; 2156 } 2157 defaultForInHeadNoscript(); 2158 processToken(token); 2159 break; 2160 case InsertionMode::Text: 2161 if (token->name() == scriptTag) { 2162 // Pause ourselves so that parsing stops until the script can be processed by the caller. 2163 ASSERT(m_tree.currentStackItem()->hasTagName(scriptTag)); 2164 if (scriptingContentIsAllowed(m_tree.parserContentPolicy())) 2165 m_scriptToProcess = m_tree.currentElement(); 2166 m_tree.openElements()->pop(); 2167 setInsertionMode(m_originalInsertionMode); 2168 2169 if (m_parser.tokenizer()) { 2170 // This token will not have been created by the tokenizer if a 2171 // self-closing script tag was encountered and pre-HTML5 parser 2172 // quirks are enabled. We must set the tokenizer's state to 2173 // DataState explicitly if the tokenizer didn't have a chance to. 2174 ASSERT(m_parser.tokenizer()->state() == HTMLTokenizer::DataState || m_options.usePreHTML5ParserQuirks); 2175 m_parser.tokenizer()->setState(HTMLTokenizer::DataState); 2176 } 2177 return; 2178 } 2179 m_tree.openElements()->pop(); 2180 setInsertionMode(m_originalInsertionMode); 2181 break; 2182 case InsertionMode::InFrameset: 2183 ASSERT(insertionMode() == InsertionMode::InFrameset); 2184 if (token->name() == framesetTag) { 2185 bool ignoreFramesetForFragmentParsing = m_tree.currentIsRootNode(); 2186#if ENABLE(TEMPLATE_ELEMENT) 2187 ignoreFramesetForFragmentParsing = ignoreFramesetForFragmentParsing || m_tree.openElements()->hasTemplateInHTMLScope(); 2188#endif 2189 if (ignoreFramesetForFragmentParsing) { 2190 ASSERT(isParsingFragmentOrTemplateContents()); 2191 parseError(token); 2192 return; 2193 } 2194 m_tree.openElements()->pop(); 2195 if (!isParsingFragment() && !m_tree.currentStackItem()->hasTagName(framesetTag)) 2196 setInsertionMode(InsertionMode::AfterFrameset); 2197 return; 2198 } 2199#if ENABLE(TEMPLATE_ELEMENT) 2200 if (token->name() == templateTag) { 2201 processTemplateEndTag(token); 2202 return; 2203 } 2204#endif 2205 break; 2206 case InsertionMode::AfterFrameset: 2207 ASSERT(insertionMode() == InsertionMode::AfterFrameset); 2208 if (token->name() == htmlTag) { 2209 setInsertionMode(InsertionMode::AfterAfterFrameset); 2210 return; 2211 } 2212 FALLTHROUGH; 2213 case InsertionMode::AfterAfterFrameset: 2214 ASSERT(insertionMode() == InsertionMode::AfterFrameset || insertionMode() == InsertionMode::AfterAfterFrameset); 2215 parseError(token); 2216 break; 2217 case InsertionMode::InSelectInTable: 2218 ASSERT(insertionMode() == InsertionMode::InSelectInTable); 2219 if (token->name() == captionTag 2220 || token->name() == tableTag 2221 || isTableBodyContextTag(token->name()) 2222 || token->name() == trTag 2223 || isTableCellContextTag(token->name())) { 2224 parseError(token); 2225 if (m_tree.openElements()->inTableScope(token->name())) { 2226 AtomicHTMLToken endSelect(HTMLToken::EndTag, selectTag.localName()); 2227 processEndTag(&endSelect); 2228 processEndTag(token); 2229 } 2230 return; 2231 } 2232 FALLTHROUGH; 2233 case InsertionMode::InSelect: 2234 ASSERT(insertionMode() == InsertionMode::InSelect || insertionMode() == InsertionMode::InSelectInTable); 2235 if (token->name() == optgroupTag) { 2236 if (isHTMLOptionElement(m_tree.currentStackItem()->node()) && m_tree.oneBelowTop() && isHTMLOptGroupElement(m_tree.oneBelowTop()->node())) 2237 processFakeEndTag(optionTag); 2238 if (isHTMLOptGroupElement(m_tree.currentStackItem()->node())) { 2239 m_tree.openElements()->pop(); 2240 return; 2241 } 2242 parseError(token); 2243 return; 2244 } 2245 if (token->name() == optionTag) { 2246 if (isHTMLOptionElement(m_tree.currentStackItem()->node())) { 2247 m_tree.openElements()->pop(); 2248 return; 2249 } 2250 parseError(token); 2251 return; 2252 } 2253 if (token->name() == selectTag) { 2254 if (!m_tree.openElements()->inSelectScope(token->name())) { 2255 ASSERT(isParsingFragment()); 2256 parseError(token); 2257 return; 2258 } 2259 m_tree.openElements()->popUntilPopped(selectTag.localName()); 2260 resetInsertionModeAppropriately(); 2261 return; 2262 } 2263#if ENABLE(TEMPLATE_ELEMENT) 2264 if (token->name() == templateTag) { 2265 processTemplateEndTag(token); 2266 return; 2267 } 2268#endif 2269 break; 2270 case InsertionMode::InTableText: 2271 defaultForInTableText(); 2272 processEndTag(token); 2273 break; 2274 case InsertionMode::TemplateContents: 2275#if ENABLE(TEMPLATE_ELEMENT) 2276 if (token->name() == templateTag) { 2277 processTemplateEndTag(token); 2278 return; 2279 } 2280#else 2281 ASSERT_NOT_REACHED(); 2282#endif 2283 break; 2284 } 2285} 2286 2287void HTMLTreeBuilder::processComment(AtomicHTMLToken* token) 2288{ 2289 ASSERT(token->type() == HTMLToken::Comment); 2290 if (m_insertionMode == InsertionMode::Initial 2291 || m_insertionMode == InsertionMode::BeforeHTML 2292 || m_insertionMode == InsertionMode::AfterAfterBody 2293 || m_insertionMode == InsertionMode::AfterAfterFrameset) { 2294 m_tree.insertCommentOnDocument(token); 2295 return; 2296 } 2297 if (m_insertionMode == InsertionMode::AfterBody) { 2298 m_tree.insertCommentOnHTMLHtmlElement(token); 2299 return; 2300 } 2301 if (m_insertionMode == InsertionMode::InTableText) { 2302 defaultForInTableText(); 2303 processComment(token); 2304 return; 2305 } 2306 m_tree.insertComment(token); 2307} 2308 2309void HTMLTreeBuilder::processCharacter(AtomicHTMLToken* token) 2310{ 2311 ASSERT(token->type() == HTMLToken::Character); 2312 ExternalCharacterTokenBuffer buffer(token); 2313 processCharacterBuffer(buffer); 2314} 2315 2316// FIXME: Extract the following iOS-specific code into a separate file. 2317#if ENABLE(TELEPHONE_NUMBER_DETECTION) && PLATFORM(IOS) 2318// From the string 4089961010, creates a link of the form <a href="tel:4089961010">4089961010</a> and inserts it. 2319void HTMLTreeBuilder::insertPhoneNumberLink(const String& string) 2320{ 2321 Vector<Attribute> attributes; 2322 attributes.append(Attribute(HTMLNames::hrefAttr, ASCIILiteral("tel:") + string)); 2323 2324 const AtomicString& aTagLocalName = aTag.localName(); 2325 AtomicHTMLToken aStartToken(HTMLToken::StartTag, aTagLocalName, attributes); 2326 AtomicHTMLToken aEndToken(HTMLToken::EndTag, aTagLocalName); 2327 2328 processStartTag(&aStartToken); 2329 m_tree.executeQueuedTasks(); 2330 m_tree.insertTextNode(string); 2331 processEndTag(&aEndToken); 2332} 2333 2334// Locates the phone numbers in the string and deals with it 2335// 1. Appends the text before the phone number as a text node. 2336// 2. Wraps the phone number in a tel: link. 2337// 3. Goes back to step 1 if a phone number is found in the rest of the string. 2338// 4. Appends the rest of the string as a text node. 2339void HTMLTreeBuilder::linkifyPhoneNumbers(const String& string) 2340{ 2341 ASSERT(TelephoneNumberDetector::isSupported()); 2342 2343 // relativeStartPosition and relativeEndPosition are the endpoints of the phone number range, 2344 // relative to the scannerPosition 2345 unsigned length = string.length(); 2346 unsigned scannerPosition = 0; 2347 int relativeStartPosition = 0; 2348 int relativeEndPosition = 0; 2349 2350 auto characters = StringView(string).upconvertedCharacters(); 2351 2352 // While there's a phone number in the rest of the string... 2353 while (scannerPosition < length && TelephoneNumberDetector::find(&characters[scannerPosition], length - scannerPosition, &relativeStartPosition, &relativeEndPosition)) { 2354 // The convention in the Data Detectors framework is that the end position is the first character NOT in the phone number 2355 // (that is, the length of the range is relativeEndPosition - relativeStartPosition). So substract 1 to get the same 2356 // convention as the old WebCore phone number parser (so that the rest of the code is still valid if we want to go back 2357 // to the old parser). 2358 --relativeEndPosition; 2359 2360 ASSERT(scannerPosition + relativeEndPosition < length); 2361 2362 m_tree.insertTextNode(string.substring(scannerPosition, relativeStartPosition)); 2363 insertPhoneNumberLink(string.substring(scannerPosition + relativeStartPosition, relativeEndPosition - relativeStartPosition + 1)); 2364 2365 scannerPosition += relativeEndPosition + 1; 2366 } 2367 2368 // Append the rest as a text node. 2369 if (scannerPosition > 0) { 2370 if (scannerPosition < length) { 2371 String after = string.substring(scannerPosition, length - scannerPosition); 2372 m_tree.insertTextNode(after); 2373 } 2374 } else 2375 m_tree.insertTextNode(string); 2376} 2377 2378// Looks at the ancestors of the element to determine whether we're inside an element which disallows parsing phone numbers. 2379static inline bool disallowTelephoneNumberParsing(const Node& node) 2380{ 2381 return node.isLink() 2382 || node.nodeType() == Node::COMMENT_NODE 2383 || node.hasTagName(scriptTag) 2384 || (node.isHTMLElement() && toHTMLElement(node).isFormControlElement()) 2385 || node.hasTagName(styleTag) 2386 || node.hasTagName(ttTag) 2387 || node.hasTagName(preTag) 2388 || node.hasTagName(codeTag); 2389} 2390 2391static inline bool shouldParseTelephoneNumbersInNode(const ContainerNode& node) 2392{ 2393 const ContainerNode* currentNode = &node; 2394 do { 2395 if (currentNode->isElementNode() && disallowTelephoneNumberParsing(*currentNode)) 2396 return false; 2397 currentNode = currentNode->parentNode(); 2398 } while (currentNode); 2399 return true; 2400} 2401#endif // ENABLE(TELEPHONE_NUMBER_DETECTION) && PLATFORM(IOS) 2402 2403void HTMLTreeBuilder::processCharacterBuffer(ExternalCharacterTokenBuffer& buffer) 2404{ 2405ReprocessBuffer: 2406 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#parsing-main-inbody 2407 // Note that this logic is different than the generic \r\n collapsing 2408 // handled in the input stream preprocessor. This logic is here as an 2409 // "authoring convenience" so folks can write: 2410 // 2411 // <pre> 2412 // lorem ipsum 2413 // lorem ipsum 2414 // </pre> 2415 // 2416 // without getting an extra newline at the start of their <pre> element. 2417 if (m_shouldSkipLeadingNewline) { 2418 m_shouldSkipLeadingNewline = false; 2419 buffer.skipAtMostOneLeadingNewline(); 2420 if (buffer.isEmpty()) 2421 return; 2422 } 2423 2424 switch (insertionMode()) { 2425 case InsertionMode::Initial: { 2426 ASSERT(insertionMode() == InsertionMode::Initial); 2427 buffer.skipLeadingWhitespace(); 2428 if (buffer.isEmpty()) 2429 return; 2430 defaultForInitial(); 2431 FALLTHROUGH; 2432 } 2433 case InsertionMode::BeforeHTML: { 2434 ASSERT(insertionMode() == InsertionMode::BeforeHTML); 2435 buffer.skipLeadingWhitespace(); 2436 if (buffer.isEmpty()) 2437 return; 2438 defaultForBeforeHTML(); 2439 FALLTHROUGH; 2440 } 2441 case InsertionMode::BeforeHead: { 2442 ASSERT(insertionMode() == InsertionMode::BeforeHead); 2443 buffer.skipLeadingWhitespace(); 2444 if (buffer.isEmpty()) 2445 return; 2446 defaultForBeforeHead(); 2447 FALLTHROUGH; 2448 } 2449 case InsertionMode::InHead: { 2450 ASSERT(insertionMode() == InsertionMode::InHead); 2451 String leadingWhitespace = buffer.takeLeadingWhitespace(); 2452 if (!leadingWhitespace.isEmpty()) 2453 m_tree.insertTextNode(leadingWhitespace, AllWhitespace); 2454 if (buffer.isEmpty()) 2455 return; 2456 defaultForInHead(); 2457 FALLTHROUGH; 2458 } 2459 case InsertionMode::AfterHead: { 2460 ASSERT(insertionMode() == InsertionMode::AfterHead); 2461 String leadingWhitespace = buffer.takeLeadingWhitespace(); 2462 if (!leadingWhitespace.isEmpty()) 2463 m_tree.insertTextNode(leadingWhitespace, AllWhitespace); 2464 if (buffer.isEmpty()) 2465 return; 2466 defaultForAfterHead(); 2467 FALLTHROUGH; 2468 } 2469 case InsertionMode::InBody: 2470 case InsertionMode::InCaption: 2471 case InsertionMode::TemplateContents: 2472 case InsertionMode::InCell: { 2473#if ENABLE(TEMPLATE_ELEMENT) 2474 ASSERT(insertionMode() == InsertionMode::InBody || insertionMode() == InsertionMode::InCaption || insertionMode() == InsertionMode::InCell || insertionMode() == InsertionMode::TemplateContents); 2475#else 2476 ASSERT(insertionMode() != InsertionMode::TemplateContents); 2477 ASSERT(insertionMode() == InsertionMode::InBody || insertionMode() == InsertionMode::InCaption || insertionMode() == InsertionMode::InCell); 2478#endif 2479 processCharacterBufferForInBody(buffer); 2480 break; 2481 } 2482 case InsertionMode::InTable: 2483 case InsertionMode::InTableBody: 2484 case InsertionMode::InRow: { 2485 ASSERT(insertionMode() == InsertionMode::InTable || insertionMode() == InsertionMode::InTableBody || insertionMode() == InsertionMode::InRow); 2486 ASSERT(m_pendingTableCharacters.isEmpty()); 2487 if (m_tree.currentStackItem()->isElementNode() 2488 && (isHTMLTableElement(m_tree.currentStackItem()->node()) 2489 || m_tree.currentStackItem()->hasTagName(HTMLNames::tbodyTag) 2490 || m_tree.currentStackItem()->hasTagName(HTMLNames::tfootTag) 2491 || m_tree.currentStackItem()->hasTagName(HTMLNames::theadTag) 2492 || m_tree.currentStackItem()->hasTagName(HTMLNames::trTag))) { 2493 m_originalInsertionMode = m_insertionMode; 2494 setInsertionMode(InsertionMode::InTableText); 2495 // Note that we fall through to the InsertionMode::InTableText case below. 2496 } else { 2497 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree); 2498 processCharacterBufferForInBody(buffer); 2499 break; 2500 } 2501 FALLTHROUGH; 2502 } 2503 case InsertionMode::InTableText: { 2504 buffer.giveRemainingTo(m_pendingTableCharacters); 2505 break; 2506 } 2507 case InsertionMode::InColumnGroup: { 2508 ASSERT(insertionMode() == InsertionMode::InColumnGroup); 2509 String leadingWhitespace = buffer.takeLeadingWhitespace(); 2510 if (!leadingWhitespace.isEmpty()) 2511 m_tree.insertTextNode(leadingWhitespace, AllWhitespace); 2512 if (buffer.isEmpty()) 2513 return; 2514 if (!processColgroupEndTagForInColumnGroup()) { 2515 ASSERT(isParsingFragmentOrTemplateContents()); 2516 // The spec tells us to drop these characters on the floor. 2517 buffer.skipLeadingNonWhitespace(); 2518 if (buffer.isEmpty()) 2519 return; 2520 } 2521 goto ReprocessBuffer; 2522 } 2523 case InsertionMode::AfterBody: 2524 case InsertionMode::AfterAfterBody: { 2525 ASSERT(insertionMode() == InsertionMode::AfterBody || insertionMode() == InsertionMode::AfterAfterBody); 2526 // FIXME: parse error 2527 setInsertionMode(InsertionMode::InBody); 2528 goto ReprocessBuffer; 2529 } 2530 case InsertionMode::Text: { 2531 ASSERT(insertionMode() == InsertionMode::Text); 2532 m_tree.insertTextNode(buffer.takeRemaining()); 2533 break; 2534 } 2535 case InsertionMode::InHeadNoscript: { 2536 ASSERT(insertionMode() == InsertionMode::InHeadNoscript); 2537 String leadingWhitespace = buffer.takeLeadingWhitespace(); 2538 if (!leadingWhitespace.isEmpty()) 2539 m_tree.insertTextNode(leadingWhitespace, AllWhitespace); 2540 if (buffer.isEmpty()) 2541 return; 2542 defaultForInHeadNoscript(); 2543 goto ReprocessBuffer; 2544 } 2545 case InsertionMode::InFrameset: 2546 case InsertionMode::AfterFrameset: { 2547 ASSERT(insertionMode() == InsertionMode::InFrameset || insertionMode() == InsertionMode::AfterFrameset || insertionMode() == InsertionMode::AfterAfterFrameset); 2548 String leadingWhitespace = buffer.takeRemainingWhitespace(); 2549 if (!leadingWhitespace.isEmpty()) 2550 m_tree.insertTextNode(leadingWhitespace, AllWhitespace); 2551 // FIXME: We should generate a parse error if we skipped over any 2552 // non-whitespace characters. 2553 break; 2554 } 2555 case InsertionMode::InSelectInTable: 2556 case InsertionMode::InSelect: { 2557 ASSERT(insertionMode() == InsertionMode::InSelect || insertionMode() == InsertionMode::InSelectInTable); 2558 m_tree.insertTextNode(buffer.takeRemaining()); 2559 break; 2560 } 2561 case InsertionMode::AfterAfterFrameset: { 2562 String leadingWhitespace = buffer.takeRemainingWhitespace(); 2563 if (!leadingWhitespace.isEmpty()) { 2564 m_tree.reconstructTheActiveFormattingElements(); 2565 m_tree.insertTextNode(leadingWhitespace, AllWhitespace); 2566 } 2567 // FIXME: We should generate a parse error if we skipped over any 2568 // non-whitespace characters. 2569 break; 2570 } 2571 } 2572} 2573 2574void HTMLTreeBuilder::processCharacterBufferForInBody(ExternalCharacterTokenBuffer& buffer) 2575{ 2576 m_tree.reconstructTheActiveFormattingElements(); 2577 String characters = buffer.takeRemaining(); 2578#if ENABLE(TELEPHONE_NUMBER_DETECTION) && PLATFORM(IOS) 2579 if (!isParsingFragment() && m_tree.isTelephoneNumberParsingEnabled() && shouldParseTelephoneNumbersInNode(*m_tree.currentNode()) && TelephoneNumberDetector::isSupported()) 2580 linkifyPhoneNumbers(characters); 2581 else 2582 m_tree.insertTextNode(characters); 2583#else 2584 m_tree.insertTextNode(characters); 2585#endif 2586 2587 if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters)) 2588 m_framesetOk = false; 2589} 2590 2591void HTMLTreeBuilder::processEndOfFile(AtomicHTMLToken* token) 2592{ 2593 ASSERT(token->type() == HTMLToken::EndOfFile); 2594 switch (insertionMode()) { 2595 case InsertionMode::Initial: 2596 ASSERT(insertionMode() == InsertionMode::Initial); 2597 defaultForInitial(); 2598 FALLTHROUGH; 2599 case InsertionMode::BeforeHTML: 2600 ASSERT(insertionMode() == InsertionMode::BeforeHTML); 2601 defaultForBeforeHTML(); 2602 FALLTHROUGH; 2603 case InsertionMode::BeforeHead: 2604 ASSERT(insertionMode() == InsertionMode::BeforeHead); 2605 defaultForBeforeHead(); 2606 FALLTHROUGH; 2607 case InsertionMode::InHead: 2608 ASSERT(insertionMode() == InsertionMode::InHead); 2609 defaultForInHead(); 2610 FALLTHROUGH; 2611 case InsertionMode::AfterHead: 2612 ASSERT(insertionMode() == InsertionMode::AfterHead); 2613 defaultForAfterHead(); 2614 FALLTHROUGH; 2615 case InsertionMode::InBody: 2616 case InsertionMode::InCell: 2617 case InsertionMode::InCaption: 2618 case InsertionMode::InRow: 2619#if ENABLE(TEMPLATE_ELEMENT) 2620 ASSERT(insertionMode() == InsertionMode::InBody || insertionMode() == InsertionMode::InCell || insertionMode() == InsertionMode::InCaption || insertionMode() == InsertionMode::InRow || insertionMode() == InsertionMode::TemplateContents); 2621#else 2622 ASSERT(insertionMode() != InsertionMode::TemplateContents); 2623 ASSERT(insertionMode() == InsertionMode::InBody || insertionMode() == InsertionMode::InCell || insertionMode() == InsertionMode::InCaption || insertionMode() == InsertionMode::InRow); 2624#endif 2625 notImplemented(); // Emit parse error based on what elements are still open. 2626#if ENABLE(TEMPLATE_ELEMENT) 2627 if (!m_templateInsertionModes.isEmpty()) 2628 if (processEndOfFileForInTemplateContents(token)) 2629 return; 2630#endif 2631 break; 2632 case InsertionMode::AfterBody: 2633 case InsertionMode::AfterAfterBody: 2634 ASSERT(insertionMode() == InsertionMode::AfterBody || insertionMode() == InsertionMode::AfterAfterBody); 2635 break; 2636 case InsertionMode::InHeadNoscript: 2637 ASSERT(insertionMode() == InsertionMode::InHeadNoscript); 2638 defaultForInHeadNoscript(); 2639 processEndOfFile(token); 2640 return; 2641 case InsertionMode::AfterFrameset: 2642 case InsertionMode::AfterAfterFrameset: 2643 ASSERT(insertionMode() == InsertionMode::AfterFrameset || insertionMode() == InsertionMode::AfterAfterFrameset); 2644 break; 2645 case InsertionMode::InColumnGroup: 2646 if (m_tree.currentIsRootNode()) { 2647 ASSERT(isParsingFragment()); 2648 return; // FIXME: Should we break here instead of returning? 2649 } 2650#if ENABLE(TEMPLATE_ELEMENT) 2651 ASSERT(m_tree.currentNode()->hasTagName(colgroupTag) || m_tree.currentNode()->hasTagName(templateTag)); 2652#else 2653 ASSERT(m_tree.currentNode()->hasTagName(colgroupTag)); 2654#endif 2655 processColgroupEndTagForInColumnGroup(); 2656 FALLTHROUGH; 2657 case InsertionMode::InFrameset: 2658 case InsertionMode::InTable: 2659 case InsertionMode::InTableBody: 2660 case InsertionMode::InSelectInTable: 2661 case InsertionMode::InSelect: 2662 ASSERT(insertionMode() == InsertionMode::InSelect || insertionMode() == InsertionMode::InSelectInTable || insertionMode() == InsertionMode::InTable || insertionMode() == InsertionMode::InFrameset || insertionMode() == InsertionMode::InTableBody || insertionMode() == InsertionMode::InColumnGroup); 2663 if (m_tree.currentNode() != m_tree.openElements()->rootNode()) 2664 parseError(token); 2665 2666#if ENABLE(TEMPLATE_ELEMENT) 2667 if (!m_templateInsertionModes.isEmpty()) 2668 if (processEndOfFileForInTemplateContents(token)) 2669 return; 2670#endif 2671 break; 2672 case InsertionMode::InTableText: 2673 defaultForInTableText(); 2674 processEndOfFile(token); 2675 return; 2676 case InsertionMode::Text: 2677 parseError(token); 2678 if (m_tree.currentStackItem()->hasTagName(scriptTag)) 2679 notImplemented(); // mark the script element as "already started". 2680 m_tree.openElements()->pop(); 2681 ASSERT(m_originalInsertionMode != InsertionMode::Text); 2682 setInsertionMode(m_originalInsertionMode); 2683 processEndOfFile(token); 2684 return; 2685 case InsertionMode::TemplateContents: 2686#if ENABLE(TEMPLATE_ELEMENT) 2687 if (processEndOfFileForInTemplateContents(token)) 2688 return; 2689 break; 2690#else 2691 ASSERT_NOT_REACHED(); 2692#endif 2693 } 2694 ASSERT(m_tree.currentNode()); 2695 m_tree.openElements()->popAll(); 2696} 2697 2698void HTMLTreeBuilder::defaultForInitial() 2699{ 2700 notImplemented(); 2701 m_tree.setDefaultCompatibilityMode(); 2702 // FIXME: parse error 2703 setInsertionMode(InsertionMode::BeforeHTML); 2704} 2705 2706void HTMLTreeBuilder::defaultForBeforeHTML() 2707{ 2708 AtomicHTMLToken startHTML(HTMLToken::StartTag, htmlTag.localName()); 2709 m_tree.insertHTMLHtmlStartTagBeforeHTML(&startHTML); 2710 setInsertionMode(InsertionMode::BeforeHead); 2711} 2712 2713void HTMLTreeBuilder::defaultForBeforeHead() 2714{ 2715 AtomicHTMLToken startHead(HTMLToken::StartTag, headTag.localName()); 2716 processStartTag(&startHead); 2717} 2718 2719void HTMLTreeBuilder::defaultForInHead() 2720{ 2721 AtomicHTMLToken endHead(HTMLToken::EndTag, headTag.localName()); 2722 processEndTag(&endHead); 2723} 2724 2725void HTMLTreeBuilder::defaultForInHeadNoscript() 2726{ 2727 AtomicHTMLToken endNoscript(HTMLToken::EndTag, noscriptTag.localName()); 2728 processEndTag(&endNoscript); 2729} 2730 2731void HTMLTreeBuilder::defaultForAfterHead() 2732{ 2733 AtomicHTMLToken startBody(HTMLToken::StartTag, bodyTag.localName()); 2734 processStartTag(&startBody); 2735 m_framesetOk = true; 2736} 2737 2738void HTMLTreeBuilder::defaultForInTableText() 2739{ 2740 String characters = m_pendingTableCharacters.toString(); 2741 m_pendingTableCharacters.clear(); 2742 if (!isAllWhitespace(characters)) { 2743 // FIXME: parse error 2744 HTMLConstructionSite::RedirectToFosterParentGuard redirecter(m_tree); 2745 m_tree.reconstructTheActiveFormattingElements(); 2746 m_tree.insertTextNode(characters, NotAllWhitespace); 2747 m_framesetOk = false; 2748 setInsertionMode(m_originalInsertionMode); 2749 return; 2750 } 2751 m_tree.insertTextNode(characters); 2752 setInsertionMode(m_originalInsertionMode); 2753} 2754 2755bool HTMLTreeBuilder::processStartTagForInHead(AtomicHTMLToken* token) 2756{ 2757 ASSERT(token->type() == HTMLToken::StartTag); 2758 if (token->name() == htmlTag) { 2759 processHtmlStartTagForInBody(token); 2760 return true; 2761 } 2762 if (token->name() == baseTag 2763 || token->name() == basefontTag 2764 || token->name() == bgsoundTag 2765 || token->name() == commandTag 2766 || token->name() == linkTag 2767 || token->name() == metaTag) { 2768 m_tree.insertSelfClosingHTMLElement(token); 2769 // Note: The custom processing for the <meta> tag is done in HTMLMetaElement::process(). 2770 return true; 2771 } 2772 if (token->name() == titleTag) { 2773 processGenericRCDATAStartTag(token); 2774 return true; 2775 } 2776 if (token->name() == noscriptTag) { 2777 if (m_options.scriptEnabled) { 2778 processGenericRawTextStartTag(token); 2779 return true; 2780 } 2781 m_tree.insertHTMLElement(token); 2782 setInsertionMode(InsertionMode::InHeadNoscript); 2783 return true; 2784 } 2785 if (token->name() == noframesTag || token->name() == styleTag) { 2786 processGenericRawTextStartTag(token); 2787 return true; 2788 } 2789 if (token->name() == scriptTag) { 2790 processScriptStartTag(token); 2791 if (m_options.usePreHTML5ParserQuirks && token->selfClosing()) 2792 processFakeEndTag(scriptTag); 2793 return true; 2794 } 2795#if ENABLE(TEMPLATE_ELEMENT) 2796 if (token->name() == templateTag) { 2797 processTemplateStartTag(token); 2798 return true; 2799 } 2800#endif 2801 if (token->name() == headTag) { 2802 parseError(token); 2803 return true; 2804 } 2805 return false; 2806} 2807 2808void HTMLTreeBuilder::processGenericRCDATAStartTag(AtomicHTMLToken* token) 2809{ 2810 ASSERT(token->type() == HTMLToken::StartTag); 2811 m_tree.insertHTMLElement(token); 2812 if (m_parser.tokenizer()) 2813 m_parser.tokenizer()->setState(HTMLTokenizer::RCDATAState); 2814 m_originalInsertionMode = m_insertionMode; 2815 setInsertionMode(InsertionMode::Text); 2816} 2817 2818void HTMLTreeBuilder::processGenericRawTextStartTag(AtomicHTMLToken* token) 2819{ 2820 ASSERT(token->type() == HTMLToken::StartTag); 2821 m_tree.insertHTMLElement(token); 2822 if (m_parser.tokenizer()) 2823 m_parser.tokenizer()->setState(HTMLTokenizer::RAWTEXTState); 2824 m_originalInsertionMode = m_insertionMode; 2825 setInsertionMode(InsertionMode::Text); 2826} 2827 2828void HTMLTreeBuilder::processScriptStartTag(AtomicHTMLToken* token) 2829{ 2830 ASSERT(token->type() == HTMLToken::StartTag); 2831 m_tree.insertScriptElement(token); 2832 if (m_parser.tokenizer()) 2833 m_parser.tokenizer()->setState(HTMLTokenizer::ScriptDataState); 2834 m_originalInsertionMode = m_insertionMode; 2835 2836 TextPosition position = m_parser.textPosition(); 2837 2838 m_scriptToProcessStartPosition = position; 2839 2840 setInsertionMode(InsertionMode::Text); 2841} 2842 2843// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#tree-construction 2844bool HTMLTreeBuilder::shouldProcessTokenInForeignContent(AtomicHTMLToken* token) 2845{ 2846 if (m_tree.isEmpty()) 2847 return false; 2848 HTMLStackItem* item = m_tree.currentStackItem(); 2849 if (item->isInHTMLNamespace()) 2850 return false; 2851 if (HTMLElementStack::isMathMLTextIntegrationPoint(item)) { 2852 if (token->type() == HTMLToken::StartTag 2853 && token->name() != MathMLNames::mglyphTag 2854 && token->name() != MathMLNames::malignmarkTag) 2855 return false; 2856 if (token->type() == HTMLToken::Character) 2857 return false; 2858 } 2859 if (item->hasTagName(MathMLNames::annotation_xmlTag) 2860 && token->type() == HTMLToken::StartTag 2861 && token->name() == SVGNames::svgTag) 2862 return false; 2863 if (HTMLElementStack::isHTMLIntegrationPoint(item)) { 2864 if (token->type() == HTMLToken::StartTag) 2865 return false; 2866 if (token->type() == HTMLToken::Character) 2867 return false; 2868 } 2869 if (token->type() == HTMLToken::EndOfFile) 2870 return false; 2871 return true; 2872} 2873 2874void HTMLTreeBuilder::processTokenInForeignContent(AtomicHTMLToken* token) 2875{ 2876 switch (token->type()) { 2877 case HTMLToken::Uninitialized: 2878 ASSERT_NOT_REACHED(); 2879 break; 2880 case HTMLToken::DOCTYPE: 2881 parseError(token); 2882 break; 2883 case HTMLToken::StartTag: { 2884 if (token->name() == bTag 2885 || token->name() == bigTag 2886 || token->name() == blockquoteTag 2887 || token->name() == bodyTag 2888 || token->name() == brTag 2889 || token->name() == centerTag 2890 || token->name() == codeTag 2891 || token->name() == ddTag 2892 || token->name() == divTag 2893 || token->name() == dlTag 2894 || token->name() == dtTag 2895 || token->name() == emTag 2896 || token->name() == embedTag 2897 || isNumberedHeaderTag(token->name()) 2898 || token->name() == headTag 2899 || token->name() == hrTag 2900 || token->name() == iTag 2901 || token->name() == imgTag 2902 || token->name() == liTag 2903 || token->name() == listingTag 2904 || token->name() == menuTag 2905 || token->name() == metaTag 2906 || token->name() == nobrTag 2907 || token->name() == olTag 2908 || token->name() == pTag 2909 || token->name() == preTag 2910 || token->name() == rubyTag 2911 || token->name() == sTag 2912 || token->name() == smallTag 2913 || token->name() == spanTag 2914 || token->name() == strongTag 2915 || token->name() == strikeTag 2916 || token->name() == subTag 2917 || token->name() == supTag 2918 || token->name() == tableTag 2919 || token->name() == ttTag 2920 || token->name() == uTag 2921 || token->name() == ulTag 2922 || token->name() == varTag 2923 || (token->name() == fontTag && (token->getAttributeItem(colorAttr) || token->getAttributeItem(faceAttr) || token->getAttributeItem(sizeAttr)))) { 2924 parseError(token); 2925 m_tree.openElements()->popUntilForeignContentScopeMarker(); 2926 processStartTag(token); 2927 return; 2928 } 2929 const AtomicString& currentNamespace = m_tree.currentStackItem()->namespaceURI(); 2930 if (currentNamespace == MathMLNames::mathmlNamespaceURI) 2931 adjustMathMLAttributes(*token); 2932 if (currentNamespace == SVGNames::svgNamespaceURI) { 2933 adjustSVGTagNameCase(*token); 2934 adjustSVGAttributes(*token); 2935 } 2936 adjustForeignAttributes(*token); 2937 m_tree.insertForeignElement(token, currentNamespace); 2938 break; 2939 } 2940 case HTMLToken::EndTag: { 2941 if (m_tree.currentStackItem()->namespaceURI() == SVGNames::svgNamespaceURI) 2942 adjustSVGTagNameCase(*token); 2943 2944 if (token->name() == SVGNames::scriptTag && m_tree.currentStackItem()->hasTagName(SVGNames::scriptTag)) { 2945 if (scriptingContentIsAllowed(m_tree.parserContentPolicy())) 2946 m_scriptToProcess = m_tree.currentElement(); 2947 m_tree.openElements()->pop(); 2948 return; 2949 } 2950 if (!m_tree.currentStackItem()->isInHTMLNamespace()) { 2951 // FIXME: This code just wants an Element* iterator, instead of an ElementRecord* 2952 HTMLElementStack::ElementRecord* nodeRecord = m_tree.openElements()->topRecord(); 2953 if (!nodeRecord->stackItem()->hasLocalName(token->name())) 2954 parseError(token); 2955 while (1) { 2956 if (nodeRecord->stackItem()->hasLocalName(token->name())) { 2957 m_tree.openElements()->popUntilPopped(nodeRecord->element()); 2958 return; 2959 } 2960 nodeRecord = nodeRecord->next(); 2961 2962 if (nodeRecord->stackItem()->isInHTMLNamespace()) 2963 break; 2964 } 2965 } 2966 // Otherwise, process the token according to the rules given in the section corresponding to the current insertion mode in HTML content. 2967 processEndTag(token); 2968 break; 2969 } 2970 case HTMLToken::Comment: 2971 m_tree.insertComment(token); 2972 return; 2973 case HTMLToken::Character: { 2974 String characters = String(token->characters(), token->charactersLength()); 2975 m_tree.insertTextNode(characters); 2976 if (m_framesetOk && !isAllWhitespaceOrReplacementCharacters(characters)) 2977 m_framesetOk = false; 2978 break; 2979 } 2980 case HTMLToken::EndOfFile: 2981 ASSERT_NOT_REACHED(); 2982 break; 2983 } 2984} 2985 2986void HTMLTreeBuilder::finished() 2987{ 2988 if (isParsingFragment()) 2989 return; 2990 2991#if ENABLE(TEMPLATE_ELEMENT) 2992 ASSERT(m_templateInsertionModes.isEmpty()); 2993#endif 2994 2995 ASSERT(m_isAttached); 2996 // Warning, this may detach the parser. Do not do anything else after this. 2997 m_tree.finishedParsing(); 2998} 2999 3000void HTMLTreeBuilder::parseError(AtomicHTMLToken*) 3001{ 3002} 3003 3004} 3005