1/*
2 * Copyright (C) 2012 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 *     * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *     * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 *     * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include "config.h"
32
33#if ENABLE(INSPECTOR)
34
35#include "DOMPatchSupport.h"
36
37#include "Attribute.h"
38#include "DOMEditor.h"
39#include "Document.h"
40#include "DocumentFragment.h"
41#include "HTMLDocument.h"
42#include "HTMLDocumentParser.h"
43#include "HTMLElement.h"
44#include "HTMLHeadElement.h"
45#include "HTMLNames.h"
46#include "InspectorHistory.h"
47#include "Node.h"
48#include "XMLDocumentParser.h"
49
50#include <wtf/Deque.h>
51#include <wtf/HashTraits.h>
52#include <wtf/RefPtr.h>
53#include <wtf/SHA1.h>
54#include <wtf/text/Base64.h>
55#include <wtf/text/CString.h>
56
57namespace WebCore {
58
59using HTMLNames::bodyTag;
60using HTMLNames::headTag;
61using HTMLNames::htmlTag;
62
63struct DOMPatchSupport::Digest {
64    explicit Digest(Node* node) : m_node(node) { }
65
66    String m_sha1;
67    String m_attrsSHA1;
68    Node* m_node;
69    Vector<std::unique_ptr<Digest>> m_children;
70};
71
72void DOMPatchSupport::patchDocument(Document* document, const String& markup)
73{
74    InspectorHistory history;
75    DOMEditor domEditor(&history);
76    DOMPatchSupport patchSupport(&domEditor, document);
77    patchSupport.patchDocument(markup);
78}
79
80DOMPatchSupport::DOMPatchSupport(DOMEditor* domEditor, Document* document)
81    : m_domEditor(domEditor)
82    , m_document(document)
83{
84}
85
86DOMPatchSupport::~DOMPatchSupport() { }
87
88void DOMPatchSupport::patchDocument(const String& markup)
89{
90    RefPtr<Document> newDocument;
91    if (m_document->isHTMLDocument())
92        newDocument = HTMLDocument::create(nullptr, URL());
93    else if (m_document->isXHTMLDocument())
94        newDocument = HTMLDocument::createXHTML(nullptr, URL());
95    else if (m_document->isSVGDocument())
96        newDocument = Document::create(nullptr, URL());
97
98    ASSERT(newDocument);
99    RefPtr<DocumentParser> parser;
100    if (newDocument->isHTMLDocument())
101        parser = HTMLDocumentParser::create(static_cast<HTMLDocument&>(*newDocument));
102    else
103        parser = XMLDocumentParser::create(*newDocument, nullptr);
104    parser->insert(markup); // Use insert() so that the parser will not yield.
105    parser->finish();
106    parser->detach();
107
108    std::unique_ptr<Digest> oldInfo = createDigest(m_document->documentElement(), nullptr);
109    std::unique_ptr<Digest> newInfo = createDigest(newDocument->documentElement(), &m_unusedNodesMap);
110
111    if (!innerPatchNode(oldInfo.get(), newInfo.get(), IGNORE_EXCEPTION)) {
112        // Fall back to rewrite.
113        m_document->write(markup);
114        m_document->close();
115    }
116}
117
118Node* DOMPatchSupport::patchNode(Node& node, const String& markup, ExceptionCode& ec)
119{
120    // Don't parse <html> as a fragment.
121    if (node.isDocumentNode() || (node.parentNode() && node.parentNode()->isDocumentNode())) {
122        patchDocument(markup);
123        return nullptr;
124    }
125
126    Node* previousSibling = node.previousSibling();
127    // FIXME: This code should use one of createFragment* in markup.h
128    RefPtr<DocumentFragment> fragment = DocumentFragment::create(*m_document);
129    if (m_document->isHTMLDocument())
130        fragment->parseHTML(markup, node.parentElement() ? node.parentElement() : m_document->documentElement());
131    else
132        fragment->parseXML(markup, node.parentElement() ? node.parentElement() : m_document->documentElement());
133
134    // Compose the old list.
135    ContainerNode* parentNode = node.parentNode();
136    Vector<std::unique_ptr<Digest>> oldList;
137    for (Node* child = parentNode->firstChild(); child; child = child->nextSibling())
138        oldList.append(createDigest(child, nullptr));
139
140    // Compose the new list.
141    String markupCopy = markup.lower();
142    Vector<std::unique_ptr<Digest>> newList;
143    for (Node* child = parentNode->firstChild(); child != &node; child = child->nextSibling())
144        newList.append(createDigest(child, nullptr));
145    for (Node* child = fragment->firstChild(); child; child = child->nextSibling()) {
146        if (child->hasTagName(headTag) && !child->firstChild() && markupCopy.find("</head>") == notFound)
147            continue; // HTML5 parser inserts empty <head> tag whenever it parses <body>
148        if (child->hasTagName(bodyTag) && !child->firstChild() && markupCopy.find("</body>") == notFound)
149            continue; // HTML5 parser inserts empty <body> tag whenever it parses </head>
150        newList.append(createDigest(child, &m_unusedNodesMap));
151    }
152    for (Node* child = node.nextSibling(); child; child = child->nextSibling())
153        newList.append(createDigest(child, nullptr));
154
155    if (!innerPatchChildren(parentNode, oldList, newList, ec)) {
156        // Fall back to total replace.
157        ec = 0;
158        if (!m_domEditor->replaceChild(parentNode, fragment.release(), &node, ec))
159            return nullptr;
160    }
161    return previousSibling ? previousSibling->nextSibling() : parentNode->firstChild();
162}
163
164bool DOMPatchSupport::innerPatchNode(Digest* oldDigest, Digest* newDigest, ExceptionCode& ec)
165{
166    if (oldDigest->m_sha1 == newDigest->m_sha1)
167        return true;
168
169    Node* oldNode = oldDigest->m_node;
170    Node* newNode = newDigest->m_node;
171
172    if (newNode->nodeType() != oldNode->nodeType() || newNode->nodeName() != oldNode->nodeName())
173        return m_domEditor->replaceChild(oldNode->parentNode(), newNode, oldNode, ec);
174
175    if (oldNode->nodeValue() != newNode->nodeValue()) {
176        if (!m_domEditor->setNodeValue(oldNode, newNode->nodeValue(), ec))
177            return false;
178    }
179
180    if (oldNode->nodeType() != Node::ELEMENT_NODE)
181        return true;
182
183    // Patch attributes
184    Element* oldElement = toElement(oldNode);
185    Element* newElement = toElement(newNode);
186    if (oldDigest->m_attrsSHA1 != newDigest->m_attrsSHA1) {
187        // FIXME: Create a function in Element for removing all properties. Take in account whether did/willModifyAttribute are important.
188        if (oldElement->hasAttributesWithoutUpdate()) {
189            while (oldElement->attributeCount()) {
190                const Attribute& attribute = oldElement->attributeAt(0);
191                if (!m_domEditor->removeAttribute(oldElement, attribute.localName(), ec))
192                    return false;
193            }
194        }
195
196        // FIXME: Create a function in Element for copying properties. cloneDataFromElement() is close but not enough for this case.
197        if (newElement->hasAttributesWithoutUpdate()) {
198            for (const Attribute& attribute : newElement->attributesIterator()) {
199                if (!m_domEditor->setAttribute(oldElement, attribute.name().localName(), attribute.value(), ec))
200                    return false;
201            }
202        }
203    }
204
205    bool result = innerPatchChildren(oldElement, oldDigest->m_children, newDigest->m_children, ec);
206    m_unusedNodesMap.remove(newDigest->m_sha1);
207    return result;
208}
209
210std::pair<DOMPatchSupport::ResultMap, DOMPatchSupport::ResultMap>
211DOMPatchSupport::diff(const Vector<std::unique_ptr<Digest>>& oldList, const Vector<std::unique_ptr<Digest>>& newList)
212{
213    ResultMap newMap(newList.size());
214    ResultMap oldMap(oldList.size());
215
216    for (size_t i = 0; i < oldMap.size(); ++i) {
217        oldMap[i].first = nullptr;
218        oldMap[i].second = 0;
219    }
220
221    for (size_t i = 0; i < newMap.size(); ++i) {
222        newMap[i].first = nullptr;
223        newMap[i].second = 0;
224    }
225
226    // Trim head and tail.
227    for (size_t i = 0; i < oldList.size() && i < newList.size() && oldList[i]->m_sha1 == newList[i]->m_sha1; ++i) {
228        oldMap[i].first = oldList[i].get();
229        oldMap[i].second = i;
230        newMap[i].first = newList[i].get();
231        newMap[i].second = i;
232    }
233    for (size_t i = 0; i < oldList.size() && i < newList.size() && oldList[oldList.size() - i - 1]->m_sha1 == newList[newList.size() - i - 1]->m_sha1; ++i) {
234        size_t oldIndex = oldList.size() - i - 1;
235        size_t newIndex = newList.size() - i - 1;
236        oldMap[oldIndex].first = oldList[oldIndex].get();
237        oldMap[oldIndex].second = newIndex;
238        newMap[newIndex].first = newList[newIndex].get();
239        newMap[newIndex].second = oldIndex;
240    }
241
242    typedef HashMap<String, Vector<size_t>> DiffTable;
243    DiffTable newTable;
244    DiffTable oldTable;
245
246    for (size_t i = 0; i < newList.size(); ++i) {
247        DiffTable::iterator it = newTable.add(newList[i]->m_sha1, Vector<size_t>()).iterator;
248        it->value.append(i);
249    }
250
251    for (size_t i = 0; i < oldList.size(); ++i) {
252        DiffTable::iterator it = oldTable.add(oldList[i]->m_sha1, Vector<size_t>()).iterator;
253        it->value.append(i);
254    }
255
256    for (DiffTable::iterator newIt = newTable.begin(); newIt != newTable.end(); ++newIt) {
257        if (newIt->value.size() != 1)
258            continue;
259
260        DiffTable::iterator oldIt = oldTable.find(newIt->key);
261        if (oldIt == oldTable.end() || oldIt->value.size() != 1)
262            continue;
263
264        newMap[newIt->value[0]] = std::make_pair(newList[newIt->value[0]].get(), oldIt->value[0]);
265        oldMap[oldIt->value[0]] = std::make_pair(oldList[oldIt->value[0]].get(), newIt->value[0]);
266    }
267
268    for (size_t i = 0; newList.size() > 0 && i < newList.size() - 1; ++i) {
269        if (!newMap[i].first || newMap[i + 1].first)
270            continue;
271
272        size_t j = newMap[i].second + 1;
273        if (j < oldMap.size() && !oldMap[j].first && newList[i + 1]->m_sha1 == oldList[j]->m_sha1) {
274            newMap[i + 1] = std::make_pair(newList[i + 1].get(), j);
275            oldMap[j] = std::make_pair(oldList[j].get(), i + 1);
276        }
277    }
278
279    for (size_t i = newList.size() - 1; newList.size() > 0 && i > 0; --i) {
280        if (!newMap[i].first || newMap[i - 1].first || newMap[i].second <= 0)
281            continue;
282
283        size_t j = newMap[i].second - 1;
284        if (!oldMap[j].first && newList[i - 1]->m_sha1 == oldList[j]->m_sha1) {
285            newMap[i - 1] = std::make_pair(newList[i - 1].get(), j);
286            oldMap[j] = std::make_pair(oldList[j].get(), i - 1);
287        }
288    }
289
290#ifdef DEBUG_DOM_PATCH_SUPPORT
291    dumpMap(oldMap, "OLD");
292    dumpMap(newMap, "NEW");
293#endif
294
295    return std::make_pair(oldMap, newMap);
296}
297
298bool DOMPatchSupport::innerPatchChildren(ContainerNode* parentNode, const Vector<std::unique_ptr<Digest>>& oldList, const Vector<std::unique_ptr<Digest>>& newList, ExceptionCode& ec)
299{
300    std::pair<ResultMap, ResultMap> resultMaps = diff(oldList, newList);
301    ResultMap& oldMap = resultMaps.first;
302    ResultMap& newMap = resultMaps.second;
303
304    Digest* oldHead = nullptr;
305    Digest* oldBody = nullptr;
306
307    // 1. First strip everything except for the nodes that retain. Collect pending merges.
308    HashMap<Digest*, Digest*> merges;
309    HashSet<size_t, WTF::IntHash<size_t>, WTF::UnsignedWithZeroKeyHashTraits<size_t>> usedNewOrdinals;
310    for (size_t i = 0; i < oldList.size(); ++i) {
311        if (oldMap[i].first) {
312            if (!usedNewOrdinals.contains(oldMap[i].second)) {
313                usedNewOrdinals.add(oldMap[i].second);
314                continue;
315            }
316            oldMap[i].first = nullptr;
317            oldMap[i].second = 0;
318        }
319
320        // Always match <head> and <body> tags with each other - we can't remove them from the DOM
321        // upon patching.
322        if (oldList[i]->m_node->hasTagName(headTag)) {
323            oldHead = oldList[i].get();
324            continue;
325        }
326        if (oldList[i]->m_node->hasTagName(bodyTag)) {
327            oldBody = oldList[i].get();
328            continue;
329        }
330
331        // Check if this change is between stable nodes. If it is, consider it as "modified".
332        if (!m_unusedNodesMap.contains(oldList[i]->m_sha1) && (!i || oldMap[i - 1].first) && (i == oldMap.size() - 1 || oldMap[i + 1].first)) {
333            size_t anchorCandidate = i ? oldMap[i - 1].second + 1 : 0;
334            size_t anchorAfter = i == oldMap.size() - 1 ? anchorCandidate + 1 : oldMap[i + 1].second;
335            if (anchorAfter - anchorCandidate == 1 && anchorCandidate < newList.size())
336                merges.set(newList[anchorCandidate].get(), oldList[i].get());
337            else {
338                if (!removeChildAndMoveToNew(oldList[i].get(), ec))
339                    return false;
340            }
341        } else {
342            if (!removeChildAndMoveToNew(oldList[i].get(), ec))
343                return false;
344        }
345    }
346
347    // Mark retained nodes as used, do not reuse node more than once.
348    HashSet<size_t, WTF::IntHash<size_t>, WTF::UnsignedWithZeroKeyHashTraits<size_t>>  usedOldOrdinals;
349    for (size_t i = 0; i < newList.size(); ++i) {
350        if (!newMap[i].first)
351            continue;
352        size_t oldOrdinal = newMap[i].second;
353        if (usedOldOrdinals.contains(oldOrdinal)) {
354            // Do not map node more than once
355            newMap[i].first = nullptr;
356            newMap[i].second = 0;
357            continue;
358        }
359        usedOldOrdinals.add(oldOrdinal);
360        markNodeAsUsed(newMap[i].first);
361    }
362
363    // Mark <head> and <body> nodes for merge.
364    if (oldHead || oldBody) {
365        for (size_t i = 0; i < newList.size(); ++i) {
366            if (oldHead && newList[i]->m_node->hasTagName(headTag))
367                merges.set(newList[i].get(), oldHead);
368            if (oldBody && newList[i]->m_node->hasTagName(bodyTag))
369                merges.set(newList[i].get(), oldBody);
370        }
371    }
372
373    // 2. Patch nodes marked for merge.
374    for (HashMap<Digest*, Digest*>::iterator it = merges.begin(); it != merges.end(); ++it) {
375        if (!innerPatchNode(it->value, it->key, ec))
376            return false;
377    }
378
379    // 3. Insert missing nodes.
380    for (size_t i = 0; i < newMap.size(); ++i) {
381        if (newMap[i].first || merges.contains(newList[i].get()))
382            continue;
383        if (!insertBeforeAndMarkAsUsed(parentNode, newList[i].get(), parentNode->childNode(i), ec))
384            return false;
385    }
386
387    // 4. Then put all nodes that retained into their slots (sort by new index).
388    for (size_t i = 0; i < oldMap.size(); ++i) {
389        if (!oldMap[i].first)
390            continue;
391        RefPtr<Node> node = oldMap[i].first->m_node;
392        Node* anchorNode = parentNode->childNode(oldMap[i].second);
393        if (node.get() == anchorNode)
394            continue;
395        if (node->hasTagName(bodyTag) || node->hasTagName(headTag))
396            continue; // Never move head or body, move the rest of the nodes around them.
397
398        if (!m_domEditor->insertBefore(parentNode, node.release(), anchorNode, ec))
399            return false;
400    }
401    return true;
402}
403
404static void addStringToSHA1(SHA1& sha1, const String& string)
405{
406    CString cString = string.utf8();
407    sha1.addBytes(reinterpret_cast<const uint8_t*>(cString.data()), cString.length());
408}
409
410std::unique_ptr<DOMPatchSupport::Digest> DOMPatchSupport::createDigest(Node* node, UnusedNodesMap* unusedNodesMap)
411{
412    auto digest = std::make_unique<Digest>(node);
413    SHA1 sha1;
414
415    Node::NodeType nodeType = node->nodeType();
416    sha1.addBytes(reinterpret_cast<const uint8_t*>(&nodeType), sizeof(nodeType));
417    addStringToSHA1(sha1, node->nodeName());
418    addStringToSHA1(sha1, node->nodeValue());
419
420    if (node->nodeType() == Node::ELEMENT_NODE) {
421        Node* child = node->firstChild();
422        while (child) {
423            std::unique_ptr<Digest> childInfo = createDigest(child, unusedNodesMap);
424            addStringToSHA1(sha1, childInfo->m_sha1);
425            child = child->nextSibling();
426            digest->m_children.append(WTF::move(childInfo));
427        }
428        Element* element = toElement(node);
429
430        if (element->hasAttributesWithoutUpdate()) {
431            SHA1 attrsSHA1;
432            for (const Attribute& attribute : element->attributesIterator()) {
433                addStringToSHA1(attrsSHA1, attribute.name().toString());
434                addStringToSHA1(attrsSHA1, attribute.value());
435            }
436            SHA1::Digest attrsHash;
437            attrsSHA1.computeHash(attrsHash);
438            digest->m_attrsSHA1 = base64Encode(attrsHash.data(), 10);
439            addStringToSHA1(sha1, digest->m_attrsSHA1);
440        }
441    }
442
443    SHA1::Digest hash;
444    sha1.computeHash(hash);
445    digest->m_sha1 = base64Encode(hash.data(), 10);
446    if (unusedNodesMap)
447        unusedNodesMap->add(digest->m_sha1, digest.get());
448
449    return digest;
450}
451
452bool DOMPatchSupport::insertBeforeAndMarkAsUsed(ContainerNode* parentNode, Digest* digest, Node* anchor, ExceptionCode& ec)
453{
454    bool result = m_domEditor->insertBefore(parentNode, digest->m_node, anchor, ec);
455    markNodeAsUsed(digest);
456    return result;
457}
458
459bool DOMPatchSupport::removeChildAndMoveToNew(Digest* oldDigest, ExceptionCode& ec)
460{
461    RefPtr<Node> oldNode = oldDigest->m_node;
462    if (!m_domEditor->removeChild(oldNode->parentNode(), oldNode.get(), ec))
463        return false;
464
465    // Diff works within levels. In order not to lose the node identity when user
466    // prepends his HTML with "<div>" (i.e. all nodes are shifted to the next nested level),
467    // prior to dropping the original node on the floor, check whether new DOM has a digest
468    // with matching sha1. If it does, replace it with the original DOM chunk. Chances are
469    // high that it will get merged back into the original DOM during the further patching.
470    UnusedNodesMap::iterator it = m_unusedNodesMap.find(oldDigest->m_sha1);
471    if (it != m_unusedNodesMap.end()) {
472        Digest* newDigest = it->value;
473        Node* newNode = newDigest->m_node;
474        if (!m_domEditor->replaceChild(newNode->parentNode(), oldNode, newNode, ec))
475            return false;
476        newDigest->m_node = oldNode.get();
477        markNodeAsUsed(newDigest);
478        return true;
479    }
480
481    for (size_t i = 0; i < oldDigest->m_children.size(); ++i) {
482        if (!removeChildAndMoveToNew(oldDigest->m_children[i].get(), ec))
483            return false;
484    }
485    return true;
486}
487
488void DOMPatchSupport::markNodeAsUsed(Digest* digest)
489{
490    Deque<Digest*> queue;
491    queue.append(digest);
492    while (!queue.isEmpty()) {
493        Digest* first = queue.takeFirst();
494        m_unusedNodesMap.remove(first->m_sha1);
495        for (size_t i = 0; i < first->m_children.size(); ++i)
496            queue.append(first->m_children[i].get());
497    }
498}
499
500#ifdef DEBUG_DOM_PATCH_SUPPORT
501static String nodeName(Node* node)
502{
503    if (node->document().isXHTMLDocument())
504         return node->nodeName();
505    return node->nodeName().lower();
506}
507
508void DOMPatchSupport::dumpMap(const ResultMap& map, const String& name)
509{
510    fprintf(stderr, "\n\n");
511    for (size_t i = 0; i < map.size(); ++i)
512        fprintf(stderr, "%s[%lu]: %s (%p) - [%lu]\n", name.utf8().data(), i, map[i].first ? nodeName(map[i].first->m_node).utf8().data() : "", map[i].first, map[i].second);
513}
514#endif
515
516} // namespace WebCore
517
518#endif // ENABLE(INSPECTOR)
519