1/*
2 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
3 * Copyright (C) 2011 Apple Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL GOOGLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "config.h"
28#include "HTMLTreeBuilder.h"
29
30#include "AtomicHTMLToken.h"
31#include "Comment.h"
32#include "DocumentFragment.h"
33#include "DocumentType.h"
34#include "Element.h"
35#include "Frame.h"
36#include "FrameLoader.h"
37#include "FrameLoaderClient.h"
38#include "HTMLDocument.h"
39#include "HTMLElementFactory.h"
40#include "HTMLFormElement.h"
41#include "HTMLHtmlElement.h"
42#include "HTMLNames.h"
43#include "HTMLParserIdioms.h"
44#include "HTMLPlugInElement.h"
45#include "HTMLScriptElement.h"
46#include "HTMLStackItem.h"
47#include "HTMLTemplateElement.h"
48#include "HTMLToken.h"
49#include "HTMLTokenizer.h"
50#include "LocalizedStrings.h"
51#include "NotImplemented.h"
52#include "Settings.h"
53#include "Text.h"
54
55namespace WebCore {
56
57using namespace HTMLNames;
58
59static inline void setAttributes(Element* element, AtomicHTMLToken* token, ParserContentPolicy parserContentPolicy)
60{
61    if (!scriptingContentIsAllowed(parserContentPolicy))
62        element->stripScriptingAttributes(token->attributes());
63    element->parserSetAttributes(token->attributes());
64}
65
66static bool hasImpliedEndTag(const HTMLStackItem* item)
67{
68    return item->hasTagName(ddTag)
69        || item->hasTagName(dtTag)
70        || item->hasTagName(liTag)
71        || item->hasTagName(optionTag)
72        || item->hasTagName(optgroupTag)
73        || item->hasTagName(pTag)
74        || item->hasTagName(rpTag)
75        || item->hasTagName(rtTag);
76}
77
78static bool shouldUseLengthLimit(const ContainerNode* node)
79{
80    return !node->hasTagName(scriptTag)
81        && !node->hasTagName(styleTag)
82        && !node->hasTagName(SVGNames::scriptTag);
83}
84
85static inline bool isAllWhitespace(const String& string)
86{
87    return string.isAllSpecialCharacters<isHTMLSpace>();
88}
89// The |lazyAttach| parameter to this function exists for historical reasons.
90// There used to be two code paths, one that used lazyAttach and one that
91// didn't. We should make the two code paths consistent and either use
92// lazyAttach or non-lazyAttach, but we wanted to make that change separately.
93static inline void insert(HTMLConstructionSiteTask& task, bool lazyAttach)
94{
95#if ENABLE(TEMPLATE_ELEMENT)
96    if (task.parent->hasTagName(templateTag))
97        task.parent = toHTMLTemplateElement(task.parent.get())->content();
98#endif
99
100    if (ContainerNode* parent = task.child->parentNode())
101        parent->parserRemoveChild(task.child.get());
102
103    if (task.nextChild)
104        task.parent->parserInsertBefore(task.child.get(), task.nextChild.get());
105    else
106        task.parent->parserAppendChild(task.child.get());
107
108    // JavaScript run from beforeload (or DOM Mutation or event handlers)
109    // might have removed the child, in which case we should not attach it.
110
111    if (task.child->parentNode() && task.parent->attached() && !task.child->attached()) {
112        if (lazyAttach)
113            task.child->lazyAttach();
114        else
115            task.child->attach();
116    }
117}
118
119static inline void executeInsertTask(HTMLConstructionSiteTask& task)
120{
121    ASSERT(task.operation == HTMLConstructionSiteTask::Insert);
122
123    insert(task, false);
124
125    task.child->beginParsingChildren();
126
127    if (task.selfClosing)
128        task.child->finishParsingChildren();
129}
130
131static inline void executeReparentTask(HTMLConstructionSiteTask& task)
132{
133    ASSERT(task.operation == HTMLConstructionSiteTask::Reparent);
134
135    if (ContainerNode* parent = task.child->parentNode())
136        parent->parserRemoveChild(task.child.get());
137
138    task.parent->parserAppendChild(task.child);
139
140    if (task.child->parentElement()->attached() && !task.child->attached())
141        task.child->lazyAttach();
142}
143
144static inline void executeInsertAlreadyParsedChildTask(HTMLConstructionSiteTask& task)
145{
146    ASSERT(task.operation == HTMLConstructionSiteTask::InsertAlreadyParsedChild);
147
148    insert(task, true);
149}
150
151static inline void executeTakeAllChildrenTask(HTMLConstructionSiteTask& task)
152{
153    ASSERT(task.operation == HTMLConstructionSiteTask::TakeAllChildren);
154
155    task.parent->takeAllChildrenFrom(task.oldParent());
156    // Notice that we don't need to manually attach the moved children
157    // because takeAllChildrenFrom does that work for us.
158}
159
160static inline void executeTask(HTMLConstructionSiteTask& task)
161{
162    switch (task.operation) {
163    case HTMLConstructionSiteTask::Insert:
164        executeInsertTask(task);
165        return;
166    // All the cases below this point are only used by the adoption agency.
167    case HTMLConstructionSiteTask::InsertAlreadyParsedChild:
168        executeInsertAlreadyParsedChildTask(task);
169        return;
170    case HTMLConstructionSiteTask::Reparent:
171        executeReparentTask(task);
172        return;
173    case HTMLConstructionSiteTask::TakeAllChildren:
174        executeTakeAllChildrenTask(task);
175        return;
176    }
177    ASSERT_NOT_REACHED();
178}
179
180void HTMLConstructionSite::attachLater(ContainerNode* parent, PassRefPtr<Node> prpChild, bool selfClosing)
181{
182    ASSERT(scriptingContentIsAllowed(m_parserContentPolicy) || !prpChild.get()->isElementNode() || !toScriptElementIfPossible(toElement(prpChild.get())));
183    ASSERT(pluginContentIsAllowed(m_parserContentPolicy) || !prpChild->isPluginElement());
184
185    HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
186    task.parent = parent;
187    task.child = prpChild;
188    task.selfClosing = selfClosing;
189
190    if (shouldFosterParent()) {
191        fosterParent(task.child);
192        return;
193    }
194
195    // Add as a sibling of the parent if we have reached the maximum depth allowed.
196    if (m_openElements.stackDepth() > m_maximumDOMTreeDepth && task.parent->parentNode())
197        task.parent = task.parent->parentNode();
198
199    ASSERT(task.parent);
200    m_taskQueue.append(task);
201}
202
203void HTMLConstructionSite::executeQueuedTasks()
204{
205    const size_t size = m_taskQueue.size();
206    if (!size)
207        return;
208
209    // Copy the task queue into a local variable in case executeTask
210    // re-enters the parser.
211    TaskQueue queue = std::move(m_taskQueue);
212
213    for (size_t i = 0; i < size; ++i)
214        executeTask(queue[i]);
215
216    // We might be detached now.
217}
218
219HTMLConstructionSite::HTMLConstructionSite(Document* document, ParserContentPolicy parserContentPolicy, unsigned maximumDOMTreeDepth)
220    : m_document(document)
221    , m_attachmentRoot(document)
222    , m_parserContentPolicy(parserContentPolicy)
223    , m_isParsingFragment(false)
224    , m_redirectAttachToFosterParent(false)
225    , m_maximumDOMTreeDepth(maximumDOMTreeDepth)
226    , m_inQuirksMode(document->inQuirksMode())
227{
228    ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument());
229}
230
231HTMLConstructionSite::HTMLConstructionSite(DocumentFragment* fragment, ParserContentPolicy parserContentPolicy, unsigned maximumDOMTreeDepth)
232    : m_document(fragment->document())
233    , m_attachmentRoot(fragment)
234    , m_parserContentPolicy(parserContentPolicy)
235    , m_isParsingFragment(true)
236    , m_redirectAttachToFosterParent(false)
237    , m_maximumDOMTreeDepth(maximumDOMTreeDepth)
238    , m_inQuirksMode(fragment->document()->inQuirksMode())
239{
240    ASSERT(m_document->isHTMLDocument() || m_document->isXHTMLDocument());
241}
242
243HTMLConstructionSite::~HTMLConstructionSite()
244{
245}
246
247void HTMLConstructionSite::detach()
248{
249    m_document = 0;
250    m_attachmentRoot = 0;
251}
252
253void HTMLConstructionSite::setForm(HTMLFormElement* form)
254{
255    // This method should only be needed for HTMLTreeBuilder in the fragment case.
256    ASSERT(!m_form);
257    m_form = form;
258}
259
260PassRefPtr<HTMLFormElement> HTMLConstructionSite::takeForm()
261{
262    return m_form.release();
263}
264
265void HTMLConstructionSite::dispatchDocumentElementAvailableIfNeeded()
266{
267    ASSERT(m_document);
268    if (m_document->frame() && !m_isParsingFragment)
269        m_document->frame()->loader()->dispatchDocumentElementAvailable();
270}
271
272void HTMLConstructionSite::insertHTMLHtmlStartTagBeforeHTML(AtomicHTMLToken* token)
273{
274    RefPtr<HTMLHtmlElement> element = HTMLHtmlElement::create(m_document);
275    setAttributes(element.get(), token, m_parserContentPolicy);
276    attachLater(m_attachmentRoot, element);
277    m_openElements.pushHTMLHtmlElement(HTMLStackItem::create(element, token));
278
279    executeQueuedTasks();
280    element->insertedByParser();
281    dispatchDocumentElementAvailableIfNeeded();
282}
283
284void HTMLConstructionSite::mergeAttributesFromTokenIntoElement(AtomicHTMLToken* token, Element* element)
285{
286    if (token->attributes().isEmpty())
287        return;
288
289    for (unsigned i = 0; i < token->attributes().size(); ++i) {
290        const Attribute& tokenAttribute = token->attributes().at(i);
291        if (!element->elementData() || !element->getAttributeItem(tokenAttribute.name()))
292            element->setAttribute(tokenAttribute.name(), tokenAttribute.value());
293    }
294}
295
296void HTMLConstructionSite::insertHTMLHtmlStartTagInBody(AtomicHTMLToken* token)
297{
298    // Fragments do not have a root HTML element, so any additional HTML elements
299    // encountered during fragment parsing should be ignored.
300    if (m_isParsingFragment)
301        return;
302
303    mergeAttributesFromTokenIntoElement(token, m_openElements.htmlElement());
304}
305
306void HTMLConstructionSite::insertHTMLBodyStartTagInBody(AtomicHTMLToken* token)
307{
308    mergeAttributesFromTokenIntoElement(token, m_openElements.bodyElement());
309}
310
311void HTMLConstructionSite::setDefaultCompatibilityMode()
312{
313    if (m_isParsingFragment)
314        return;
315    if (m_document->isSrcdocDocument())
316        return;
317    setCompatibilityMode(Document::QuirksMode);
318}
319
320void HTMLConstructionSite::setCompatibilityMode(Document::CompatibilityMode mode)
321{
322    m_inQuirksMode = (mode == Document::QuirksMode);
323    m_document->setCompatibilityMode(mode);
324}
325
326void HTMLConstructionSite::setCompatibilityModeFromDoctype(const String& name, const String& publicId, const String& systemId)
327{
328    // There are three possible compatibility modes:
329    // Quirks - quirks mode emulates WinIE and NS4. CSS parsing is also relaxed in this mode, e.g., unit types can
330    // be omitted from numbers.
331    // Limited Quirks - This mode is identical to no-quirks mode except for its treatment of line-height in the inline box model.
332    // No Quirks - no quirks apply. Web pages will obey the specifications to the letter.
333
334    // Check for Quirks Mode.
335    if (name != "html"
336        || publicId.startsWith("+//Silmaril//dtd html Pro v0r11 19970101//", false)
337        || publicId.startsWith("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//", false)
338        || publicId.startsWith("-//AS//DTD HTML 3.0 asWedit + extensions//", false)
339        || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 1//", false)
340        || publicId.startsWith("-//IETF//DTD HTML 2.0 Level 2//", false)
341        || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 1//", false)
342        || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict Level 2//", false)
343        || publicId.startsWith("-//IETF//DTD HTML 2.0 Strict//", false)
344        || publicId.startsWith("-//IETF//DTD HTML 2.0//", false)
345        || publicId.startsWith("-//IETF//DTD HTML 2.1E//", false)
346        || publicId.startsWith("-//IETF//DTD HTML 3.0//", false)
347        || publicId.startsWith("-//IETF//DTD HTML 3.2 Final//", false)
348        || publicId.startsWith("-//IETF//DTD HTML 3.2//", false)
349        || publicId.startsWith("-//IETF//DTD HTML 3//", false)
350        || publicId.startsWith("-//IETF//DTD HTML Level 0//", false)
351        || publicId.startsWith("-//IETF//DTD HTML Level 1//", false)
352        || publicId.startsWith("-//IETF//DTD HTML Level 2//", false)
353        || publicId.startsWith("-//IETF//DTD HTML Level 3//", false)
354        || publicId.startsWith("-//IETF//DTD HTML Strict Level 0//", false)
355        || publicId.startsWith("-//IETF//DTD HTML Strict Level 1//", false)
356        || publicId.startsWith("-//IETF//DTD HTML Strict Level 2//", false)
357        || publicId.startsWith("-//IETF//DTD HTML Strict Level 3//", false)
358        || publicId.startsWith("-//IETF//DTD HTML Strict//", false)
359        || publicId.startsWith("-//IETF//DTD HTML//", false)
360        || publicId.startsWith("-//Metrius//DTD Metrius Presentational//", false)
361        || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//", false)
362        || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 HTML//", false)
363        || publicId.startsWith("-//Microsoft//DTD Internet Explorer 2.0 Tables//", false)
364        || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//", false)
365        || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 HTML//", false)
366        || publicId.startsWith("-//Microsoft//DTD Internet Explorer 3.0 Tables//", false)
367        || publicId.startsWith("-//Netscape Comm. Corp.//DTD HTML//", false)
368        || publicId.startsWith("-//Netscape Comm. Corp.//DTD Strict HTML//", false)
369        || publicId.startsWith("-//O'Reilly and Associates//DTD HTML 2.0//", false)
370        || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended 1.0//", false)
371        || publicId.startsWith("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//", false)
372        || publicId.startsWith("-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//", false)
373        || publicId.startsWith("-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//", false)
374        || publicId.startsWith("-//Spyglass//DTD HTML 2.0 Extended//", false)
375        || publicId.startsWith("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//", false)
376        || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava HTML//", false)
377        || publicId.startsWith("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//", false)
378        || publicId.startsWith("-//W3C//DTD HTML 3 1995-03-24//", false)
379        || publicId.startsWith("-//W3C//DTD HTML 3.2 Draft//", false)
380        || publicId.startsWith("-//W3C//DTD HTML 3.2 Final//", false)
381        || publicId.startsWith("-//W3C//DTD HTML 3.2//", false)
382        || publicId.startsWith("-//W3C//DTD HTML 3.2S Draft//", false)
383        || publicId.startsWith("-//W3C//DTD HTML 4.0 Frameset//", false)
384        || publicId.startsWith("-//W3C//DTD HTML 4.0 Transitional//", false)
385        || publicId.startsWith("-//W3C//DTD HTML Experimental 19960712//", false)
386        || publicId.startsWith("-//W3C//DTD HTML Experimental 970421//", false)
387        || publicId.startsWith("-//W3C//DTD W3 HTML//", false)
388        || publicId.startsWith("-//W3O//DTD W3 HTML 3.0//", false)
389        || equalIgnoringCase(publicId, "-//W3O//DTD W3 HTML Strict 3.0//EN//")
390        || publicId.startsWith("-//WebTechs//DTD Mozilla HTML 2.0//", false)
391        || publicId.startsWith("-//WebTechs//DTD Mozilla HTML//", false)
392        || equalIgnoringCase(publicId, "-/W3C/DTD HTML 4.0 Transitional/EN")
393        || equalIgnoringCase(publicId, "HTML")
394        || equalIgnoringCase(systemId, "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")
395        || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false))
396        || (systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) {
397        setCompatibilityMode(Document::QuirksMode);
398        return;
399    }
400
401    // Check for Limited Quirks Mode.
402    if (publicId.startsWith("-//W3C//DTD XHTML 1.0 Frameset//", false)
403        || publicId.startsWith("-//W3C//DTD XHTML 1.0 Transitional//", false)
404        || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Frameset//", false))
405        || (!systemId.isEmpty() && publicId.startsWith("-//W3C//DTD HTML 4.01 Transitional//", false))) {
406        setCompatibilityMode(Document::LimitedQuirksMode);
407        return;
408    }
409
410    // Otherwise we are No Quirks Mode.
411    setCompatibilityMode(Document::NoQuirksMode);
412}
413
414void HTMLConstructionSite::finishedParsing()
415{
416    m_document->finishedParsing();
417}
418
419void HTMLConstructionSite::insertDoctype(AtomicHTMLToken* token)
420{
421    ASSERT(token->type() == HTMLToken::DOCTYPE);
422
423    const String& publicId = StringImpl::create8BitIfPossible(token->publicIdentifier());
424    const String& systemId = StringImpl::create8BitIfPossible(token->systemIdentifier());
425    RefPtr<DocumentType> doctype = DocumentType::create(m_document, token->name(), publicId, systemId);
426    attachLater(m_attachmentRoot, doctype.release());
427
428    // DOCTYPE nodes are only processed when parsing fragments w/o contextElements, which
429    // never occurs.  However, if we ever chose to support such, this code is subtly wrong,
430    // because context-less fragments can determine their own quirks mode, and thus change
431    // parsing rules (like <p> inside <table>).  For now we ASSERT that we never hit this code
432    // in a fragment, as changing the owning document's compatibility mode would be wrong.
433    ASSERT(!m_isParsingFragment);
434    if (m_isParsingFragment)
435        return;
436
437    if (token->forceQuirks())
438        setCompatibilityMode(Document::QuirksMode);
439    else {
440        setCompatibilityModeFromDoctype(token->name(), publicId, systemId);
441    }
442}
443
444void HTMLConstructionSite::insertComment(AtomicHTMLToken* token)
445{
446    ASSERT(token->type() == HTMLToken::Comment);
447    attachLater(currentNode(), Comment::create(ownerDocumentForCurrentNode(), token->comment()));
448}
449
450void HTMLConstructionSite::insertCommentOnDocument(AtomicHTMLToken* token)
451{
452    ASSERT(token->type() == HTMLToken::Comment);
453    attachLater(m_attachmentRoot, Comment::create(m_document, token->comment()));
454}
455
456void HTMLConstructionSite::insertCommentOnHTMLHtmlElement(AtomicHTMLToken* token)
457{
458    ASSERT(token->type() == HTMLToken::Comment);
459    ContainerNode* parent = m_openElements.rootNode();
460    attachLater(parent, Comment::create(parent->document(), token->comment()));
461}
462
463void HTMLConstructionSite::insertHTMLHeadElement(AtomicHTMLToken* token)
464{
465    ASSERT(!shouldFosterParent());
466    m_head = HTMLStackItem::create(createHTMLElement(token), token);
467    attachLater(currentNode(), m_head->element());
468    m_openElements.pushHTMLHeadElement(m_head);
469}
470
471void HTMLConstructionSite::insertHTMLBodyElement(AtomicHTMLToken* token)
472{
473    ASSERT(!shouldFosterParent());
474    RefPtr<Element> body = createHTMLElement(token);
475    attachLater(currentNode(), body);
476    m_openElements.pushHTMLBodyElement(HTMLStackItem::create(body.release(), token));
477    if (Frame* frame = m_document->frame())
478        frame->loader()->client()->dispatchWillInsertBody();
479}
480
481void HTMLConstructionSite::insertHTMLFormElement(AtomicHTMLToken* token, bool isDemoted)
482{
483    RefPtr<Element> element = createHTMLElement(token);
484    ASSERT(element->hasTagName(formTag));
485    m_form = static_pointer_cast<HTMLFormElement>(element.release());
486    m_form->setDemoted(isDemoted);
487    attachLater(currentNode(), m_form);
488    m_openElements.push(HTMLStackItem::create(m_form, token));
489}
490
491void HTMLConstructionSite::insertHTMLElement(AtomicHTMLToken* token)
492{
493    RefPtr<Element> element = createHTMLElement(token);
494    attachLater(currentNode(), element);
495    m_openElements.push(HTMLStackItem::create(element.release(), token));
496}
497
498void HTMLConstructionSite::insertSelfClosingHTMLElement(AtomicHTMLToken* token)
499{
500    ASSERT(token->type() == HTMLToken::StartTag);
501    // Normally HTMLElementStack is responsible for calling finishParsingChildren,
502    // but self-closing elements are never in the element stack so the stack
503    // doesn't get a chance to tell them that we're done parsing their children.
504    attachLater(currentNode(), createHTMLElement(token), true);
505    // FIXME: Do we want to acknowledge the token's self-closing flag?
506    // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#acknowledge-self-closing-flag
507}
508
509void HTMLConstructionSite::insertFormattingElement(AtomicHTMLToken* token)
510{
511    // http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#the-stack-of-open-elements
512    // Possible active formatting elements include:
513    // a, b, big, code, em, font, i, nobr, s, small, strike, strong, tt, and u.
514    insertHTMLElement(token);
515    m_activeFormattingElements.append(currentElementRecord()->stackItem());
516}
517
518void HTMLConstructionSite::insertScriptElement(AtomicHTMLToken* token)
519{
520    // http://www.whatwg.org/specs/web-apps/current-work/multipage/scripting-1.html#already-started
521    // http://html5.org/specs/dom-parsing.html#dom-range-createcontextualfragment
522    // For createContextualFragment, the specifications say to mark it parser-inserted and already-started and later unmark them.
523    // However, we short circuit that logic to avoid the subtree traversal to find script elements since scripts can never see
524    // those flags or effects thereof.
525    const bool parserInserted = m_parserContentPolicy != AllowScriptingContentAndDoNotMarkAlreadyStarted;
526    const bool alreadyStarted = m_isParsingFragment && parserInserted;
527    RefPtr<HTMLScriptElement> element = HTMLScriptElement::create(scriptTag, ownerDocumentForCurrentNode(), parserInserted, alreadyStarted);
528    setAttributes(element.get(), token, m_parserContentPolicy);
529    if (scriptingContentIsAllowed(m_parserContentPolicy))
530        attachLater(currentNode(), element);
531    m_openElements.push(HTMLStackItem::create(element.release(), token));
532}
533
534void HTMLConstructionSite::insertForeignElement(AtomicHTMLToken* token, const AtomicString& namespaceURI)
535{
536    ASSERT(token->type() == HTMLToken::StartTag);
537    notImplemented(); // parseError when xmlns or xmlns:xlink are wrong.
538
539    RefPtr<Element> element = createElement(token, namespaceURI);
540    if (scriptingContentIsAllowed(m_parserContentPolicy) || !toScriptElementIfPossible(element.get()))
541        attachLater(currentNode(), element, token->selfClosing());
542    if (!token->selfClosing())
543        m_openElements.push(HTMLStackItem::create(element.release(), token, namespaceURI));
544}
545
546void HTMLConstructionSite::insertTextNode(const String& characters, WhitespaceMode whitespaceMode)
547{
548    HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
549    task.parent = currentNode();
550
551    if (shouldFosterParent())
552        findFosterSite(task);
553
554#if ENABLE(TEMPLATE_ELEMENT)
555    if (task.parent->hasTagName(templateTag))
556        task.parent = toHTMLTemplateElement(task.parent.get())->content();
557#endif
558
559    // Strings composed entirely of whitespace are likely to be repeated.
560    // Turn them into AtomicString so we share a single string for each.
561    bool shouldUseAtomicString = whitespaceMode == AllWhitespace
562        || (whitespaceMode == WhitespaceUnknown && isAllWhitespace(characters));
563
564    unsigned currentPosition = 0;
565    unsigned lengthLimit = shouldUseLengthLimit(task.parent.get()) ? Text::defaultLengthLimit : std::numeric_limits<unsigned>::max();
566
567    // FIXME: Splitting text nodes into smaller chunks contradicts HTML5 spec, but is currently necessary
568    // for performance, see <https://bugs.webkit.org/show_bug.cgi?id=55898>.
569
570    Node* previousChild = task.nextChild ? task.nextChild->previousSibling() : task.parent->lastChild();
571    if (previousChild && previousChild->isTextNode()) {
572        // FIXME: We're only supposed to append to this text node if it
573        // was the last text node inserted by the parser.
574        CharacterData* textNode = static_cast<CharacterData*>(previousChild);
575        currentPosition = textNode->parserAppendData(characters, 0, lengthLimit);
576    }
577
578    while (currentPosition < characters.length()) {
579        RefPtr<Text> textNode = Text::createWithLengthLimit(task.parent->document(), shouldUseAtomicString ? AtomicString(characters).string() : characters, currentPosition, lengthLimit);
580        // If we have a whole string of unbreakable characters the above could lead to an infinite loop. Exceeding the length limit is the lesser evil.
581        if (!textNode->length()) {
582            String substring = characters.substring(currentPosition);
583            textNode = Text::create(task.parent->document(), shouldUseAtomicString ? AtomicString(substring).string() : substring);
584        }
585
586        currentPosition += textNode->length();
587        ASSERT(currentPosition <= characters.length());
588        task.child = textNode.release();
589
590        executeTask(task);
591    }
592}
593
594void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord& newParent, HTMLElementStack::ElementRecord& child)
595{
596    HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
597    task.parent = newParent.node();
598    task.child = child.element();
599    m_taskQueue.append(task);
600}
601
602void HTMLConstructionSite::reparent(HTMLElementStack::ElementRecord& newParent, HTMLStackItem& child)
603{
604    HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Reparent);
605    task.parent = newParent.node();
606    task.child = child.element();
607    m_taskQueue.append(task);
608}
609
610void HTMLConstructionSite::insertAlreadyParsedChild(HTMLStackItem& newParent, HTMLElementStack::ElementRecord& child)
611{
612    if (newParent.causesFosterParenting()) {
613        fosterParent(child.element());
614        return;
615    }
616
617    HTMLConstructionSiteTask task(HTMLConstructionSiteTask::InsertAlreadyParsedChild);
618    task.parent = newParent.node();
619    task.child = child.element();
620    m_taskQueue.append(task);
621}
622
623void HTMLConstructionSite::takeAllChildren(HTMLStackItem& newParent, HTMLElementStack::ElementRecord& oldParent)
624{
625    HTMLConstructionSiteTask task(HTMLConstructionSiteTask::TakeAllChildren);
626    task.parent = newParent.node();
627    task.child = oldParent.node();
628    m_taskQueue.append(task);
629}
630
631PassRefPtr<Element> HTMLConstructionSite::createElement(AtomicHTMLToken* token, const AtomicString& namespaceURI)
632{
633    QualifiedName tagName(nullAtom, token->name(), namespaceURI);
634    RefPtr<Element> element = ownerDocumentForCurrentNode()->createElement(tagName, true);
635    setAttributes(element.get(), token, m_parserContentPolicy);
636    return element.release();
637}
638
639inline Document* HTMLConstructionSite::ownerDocumentForCurrentNode()
640{
641#if ENABLE(TEMPLATE_ELEMENT)
642    if (currentNode()->hasTagName(templateTag))
643        return toHTMLTemplateElement(currentElement())->content()->document();
644#endif
645    return currentNode()->document();
646}
647
648PassRefPtr<Element> HTMLConstructionSite::createHTMLElement(AtomicHTMLToken* token)
649{
650    QualifiedName tagName(nullAtom, token->name(), xhtmlNamespaceURI);
651    // FIXME: This can't use HTMLConstructionSite::createElement because we
652    // have to pass the current form element.  We should rework form association
653    // to occur after construction to allow better code sharing here.
654    RefPtr<Element> element = HTMLElementFactory::createHTMLElement(tagName, ownerDocumentForCurrentNode(), form(), true);
655    setAttributes(element.get(), token, m_parserContentPolicy);
656    ASSERT(element->isHTMLElement());
657    return element.release();
658}
659
660PassRefPtr<HTMLStackItem> HTMLConstructionSite::createElementFromSavedToken(HTMLStackItem* item)
661{
662    RefPtr<Element> element;
663    // NOTE: Moving from item -> token -> item copies the Attribute vector twice!
664    AtomicHTMLToken fakeToken(HTMLToken::StartTag, item->localName(), item->attributes());
665    if (item->namespaceURI() == HTMLNames::xhtmlNamespaceURI)
666        element = createHTMLElement(&fakeToken);
667    else
668        element = createElement(&fakeToken, item->namespaceURI());
669    return HTMLStackItem::create(element.release(), &fakeToken, item->namespaceURI());
670}
671
672bool HTMLConstructionSite::indexOfFirstUnopenFormattingElement(unsigned& firstUnopenElementIndex) const
673{
674    if (m_activeFormattingElements.isEmpty())
675        return false;
676    unsigned index = m_activeFormattingElements.size();
677    do {
678        --index;
679        const HTMLFormattingElementList::Entry& entry = m_activeFormattingElements.at(index);
680        if (entry.isMarker() || m_openElements.contains(entry.element())) {
681            firstUnopenElementIndex = index + 1;
682            return firstUnopenElementIndex < m_activeFormattingElements.size();
683        }
684    } while (index);
685    firstUnopenElementIndex = index;
686    return true;
687}
688
689void HTMLConstructionSite::reconstructTheActiveFormattingElements()
690{
691    unsigned firstUnopenElementIndex;
692    if (!indexOfFirstUnopenFormattingElement(firstUnopenElementIndex))
693        return;
694
695    unsigned unopenEntryIndex = firstUnopenElementIndex;
696    ASSERT(unopenEntryIndex < m_activeFormattingElements.size());
697    for (; unopenEntryIndex < m_activeFormattingElements.size(); ++unopenEntryIndex) {
698        HTMLFormattingElementList::Entry& unopenedEntry = m_activeFormattingElements.at(unopenEntryIndex);
699        RefPtr<HTMLStackItem> reconstructed = createElementFromSavedToken(unopenedEntry.stackItem().get());
700        attachLater(currentNode(), reconstructed->node());
701        m_openElements.push(reconstructed);
702        unopenedEntry.replaceElement(reconstructed.release());
703    }
704}
705
706void HTMLConstructionSite::generateImpliedEndTagsWithExclusion(const AtomicString& tagName)
707{
708    while (hasImpliedEndTag(currentStackItem()) && !currentStackItem()->matchesHTMLTag(tagName))
709        m_openElements.pop();
710}
711
712void HTMLConstructionSite::generateImpliedEndTags()
713{
714    while (hasImpliedEndTag(currentStackItem()))
715        m_openElements.pop();
716}
717
718bool HTMLConstructionSite::inQuirksMode()
719{
720    return m_inQuirksMode;
721}
722
723void HTMLConstructionSite::findFosterSite(HTMLConstructionSiteTask& task)
724{
725#if ENABLE(TEMPLATE_ELEMENT)
726    // When a node is to be foster parented, the last template element with no table element is below it in the stack of open elements is the foster parent element (NOT the template's parent!)
727    HTMLElementStack::ElementRecord* lastTemplateElement = m_openElements.topmost(templateTag.localName());
728    if (lastTemplateElement && !m_openElements.inTableScope(tableTag)) {
729        task.parent = lastTemplateElement->element();
730        return;
731    }
732
733#endif
734
735    HTMLElementStack::ElementRecord* lastTableElementRecord = m_openElements.topmost(tableTag.localName());
736    if (lastTableElementRecord) {
737        Element* lastTableElement = lastTableElementRecord->element();
738        ContainerNode* parent = lastTableElement->parentNode();
739        // When parsing HTML fragments, we skip step 4.2 ("Let root be a new html element with no attributes") for efficiency,
740        // and instead use the DocumentFragment as a root node. So we must treat the root node (DocumentFragment) as if it is a html element here.
741        bool parentCanBeFosterParent = parent && (parent->isElementNode() || (m_isParsingFragment && parent == m_openElements.rootNode()));
742#if ENABLE(TEMPLATE_ELEMENT)
743        parentCanBeFosterParent = parentCanBeFosterParent || (parent && parent->isDocumentFragment() && static_cast<DocumentFragment*>(parent)->isTemplateContent());
744#endif
745        if (parentCanBeFosterParent) {
746            task.parent = parent;
747            task.nextChild = lastTableElement;
748            return;
749        }
750        task.parent = lastTableElementRecord->next()->element();
751        return;
752    }
753    // Fragment case
754    task.parent = m_openElements.rootNode(); // DocumentFragment
755}
756
757bool HTMLConstructionSite::shouldFosterParent() const
758{
759    return m_redirectAttachToFosterParent
760        && currentStackItem()->isElementNode()
761        && currentStackItem()->causesFosterParenting();
762}
763
764void HTMLConstructionSite::fosterParent(PassRefPtr<Node> node)
765{
766    HTMLConstructionSiteTask task(HTMLConstructionSiteTask::Insert);
767    findFosterSite(task);
768    task.child = node;
769    ASSERT(task.parent);
770
771    m_taskQueue.append(task);
772}
773
774}
775