1/*
2 * Copyright (C) 2008, 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 *
8 * 1.  Redistributions of source code must retain the above copyright
9 *     notice, this list of conditions and the following disclaimer.
10 * 2.  Redistributions in binary form must reproduce the above copyright
11 *     notice, this list of conditions and the following disclaimer in the
12 *     documentation and/or other materials provided with the distribution.
13 * 3.  Neither the name of Apple Inc. ("Apple") nor the names of
14 *     its contributors may be used to endorse or promote products derived
15 *     from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include "config.h"
30#include "LegacyWebArchive.h"
31
32#include "CachedResource.h"
33#include "Document.h"
34#include "DocumentLoader.h"
35#include "Frame.h"
36#include "FrameLoader.h"
37#include "FrameSelection.h"
38#include "FrameTree.h"
39#include "HTMLFrameOwnerElement.h"
40#include "HTMLNames.h"
41#include "IconDatabase.h"
42#include "Image.h"
43#include "URLHash.h"
44#include "Logging.h"
45#include "MemoryCache.h"
46#include "Page.h"
47#include "Range.h"
48#include "ResourceBuffer.h"
49#include "Settings.h"
50#include "markup.h"
51#include <wtf/ListHashSet.h>
52#include <wtf/RetainPtr.h>
53#include <wtf/text/StringBuilder.h>
54#include <wtf/text/CString.h>
55
56namespace WebCore {
57
58static const CFStringRef LegacyWebArchiveMainResourceKey = CFSTR("WebMainResource");
59static const CFStringRef LegacyWebArchiveSubresourcesKey = CFSTR("WebSubresources");
60static const CFStringRef LegacyWebArchiveSubframeArchivesKey = CFSTR("WebSubframeArchives");
61static const CFStringRef LegacyWebArchiveResourceDataKey = CFSTR("WebResourceData");
62static const CFStringRef LegacyWebArchiveResourceFrameNameKey = CFSTR("WebResourceFrameName");
63static const CFStringRef LegacyWebArchiveResourceMIMETypeKey = CFSTR("WebResourceMIMEType");
64static const CFStringRef LegacyWebArchiveResourceURLKey = CFSTR("WebResourceURL");
65static const CFStringRef LegacyWebArchiveResourceTextEncodingNameKey = CFSTR("WebResourceTextEncodingName");
66static const CFStringRef LegacyWebArchiveResourceResponseKey = CFSTR("WebResourceResponse");
67static const CFStringRef LegacyWebArchiveResourceResponseVersionKey = CFSTR("WebResourceResponseVersion");
68
69RetainPtr<CFDictionaryRef> LegacyWebArchive::createPropertyListRepresentation(ArchiveResource* resource, MainResourceStatus isMainResource)
70{
71    if (!resource) {
72        // The property list representation of a null/empty WebResource has the following 3 objects stored as nil.
73        // FIXME: 0 is not serializable. Presumably we need to use kCFNull here instead for compatibility.
74        // FIXME: But why do we need to support a resource of 0? Who relies on that?
75        RetainPtr<CFMutableDictionaryRef> propertyList = adoptCF(CFDictionaryCreateMutable(0, 3, 0, 0));
76        CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceDataKey, 0);
77        CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceURLKey, 0);
78        CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceMIMETypeKey, 0);
79        return propertyList;
80    }
81
82    RetainPtr<CFMutableDictionaryRef> propertyList = adoptCF(CFDictionaryCreateMutable(0, 6, 0, &kCFTypeDictionaryValueCallBacks));
83
84    // Resource data can be empty, but must be represented by an empty CFDataRef
85    SharedBuffer* data = resource->data();
86    RetainPtr<CFDataRef> cfData;
87    if (data)
88        cfData = data->createCFData();
89    else
90        cfData = adoptCF(CFDataCreate(0, 0, 0));
91    CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceDataKey, cfData.get());
92
93    // Resource URL cannot be null
94    if (RetainPtr<CFStringRef> cfURL = resource->url().string().createCFString())
95        CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceURLKey, cfURL.get());
96    else {
97        LOG(Archives, "LegacyWebArchive - NULL resource URL is invalid - returning null property list");
98        return 0;
99    }
100
101    // FrameName should be left out if empty for subresources, but always included for main resources
102    const String& frameName(resource->frameName());
103    if (!frameName.isEmpty() || isMainResource)
104        CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceFrameNameKey, frameName.createCFString().get());
105
106    // Set MIMEType, TextEncodingName, and ResourceResponse only if they actually exist
107    const String& mimeType(resource->mimeType());
108    if (!mimeType.isEmpty())
109        CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceMIMETypeKey, mimeType.createCFString().get());
110
111    const String& textEncoding(resource->textEncoding());
112    if (!textEncoding.isEmpty())
113        CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceTextEncodingNameKey, textEncoding.createCFString().get());
114
115    // Don't include the resource response for the main resource
116    if (!isMainResource) {
117        RetainPtr<CFDataRef> resourceResponseData = createPropertyListRepresentation(resource->response());
118        if (resourceResponseData)
119            CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceResponseKey, resourceResponseData.get());
120    }
121
122    return propertyList;
123}
124
125RetainPtr<CFDictionaryRef> LegacyWebArchive::createPropertyListRepresentation(Archive* archive)
126{
127    RetainPtr<CFMutableDictionaryRef> propertyList = adoptCF(CFDictionaryCreateMutable(0, 3, 0, &kCFTypeDictionaryValueCallBacks));
128
129    RetainPtr<CFDictionaryRef> mainResourceDict = createPropertyListRepresentation(archive->mainResource(), MainResource);
130    ASSERT(mainResourceDict);
131    if (!mainResourceDict)
132        return 0;
133    CFDictionarySetValue(propertyList.get(), LegacyWebArchiveMainResourceKey, mainResourceDict.get());
134
135    RetainPtr<CFMutableArrayRef> subresourcesArray = adoptCF(CFArrayCreateMutable(0, archive->subresources().size(), &kCFTypeArrayCallBacks));
136    const Vector<RefPtr<ArchiveResource>>& subresources(archive->subresources());
137    for (unsigned i = 0; i < subresources.size(); ++i) {
138        RetainPtr<CFDictionaryRef> subresource = createPropertyListRepresentation(subresources[i].get(), Subresource);
139        if (subresource)
140            CFArrayAppendValue(subresourcesArray.get(), subresource.get());
141        else
142            LOG(Archives, "LegacyWebArchive - Failed to create property list for subresource");
143    }
144    if (CFArrayGetCount(subresourcesArray.get()))
145        CFDictionarySetValue(propertyList.get(), LegacyWebArchiveSubresourcesKey, subresourcesArray.get());
146
147    RetainPtr<CFMutableArrayRef> subframesArray = adoptCF(CFArrayCreateMutable(0, archive->subframeArchives().size(), &kCFTypeArrayCallBacks));
148    const Vector<RefPtr<Archive>>& subframeArchives(archive->subframeArchives());
149    for (unsigned i = 0; i < subframeArchives.size(); ++i) {
150        RetainPtr<CFDictionaryRef> subframeArchive = createPropertyListRepresentation(subframeArchives[i].get());
151        if (subframeArchive)
152            CFArrayAppendValue(subframesArray.get(), subframeArchive.get());
153        else
154            LOG(Archives, "LegacyWebArchive - Failed to create property list for subframe archive");
155    }
156    if (CFArrayGetCount(subframesArray.get()))
157        CFDictionarySetValue(propertyList.get(), LegacyWebArchiveSubframeArchivesKey, subframesArray.get());
158
159    return propertyList;
160}
161
162ResourceResponse LegacyWebArchive::createResourceResponseFromPropertyListData(CFDataRef data, CFStringRef responseDataType)
163{
164    ASSERT(data);
165    if (!data)
166        return ResourceResponse();
167
168    // If the ResourceResponseVersion (passed in as responseDataType) exists at all, this is a "new" web archive that we
169    // can parse well in a cross platform manner If it doesn't exist, we will assume this is an "old" web archive with,
170    // NSURLResponse objects in it and parse the ResourceResponse as such.
171    if (!responseDataType)
172        return createResourceResponseFromMacArchivedData(data);
173
174    // FIXME: Parse the "new" format that the above comment references here. This format doesn't exist yet.
175    return ResourceResponse();
176}
177
178PassRefPtr<ArchiveResource> LegacyWebArchive::createResource(CFDictionaryRef dictionary)
179{
180    ASSERT(dictionary);
181    if (!dictionary)
182        return 0;
183
184    CFDataRef resourceData = static_cast<CFDataRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceDataKey));
185    if (resourceData && CFGetTypeID(resourceData) != CFDataGetTypeID()) {
186        LOG(Archives, "LegacyWebArchive - Resource data is not of type CFData, cannot create invalid resource");
187        return 0;
188    }
189
190    CFStringRef frameName = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceFrameNameKey));
191    if (frameName && CFGetTypeID(frameName) != CFStringGetTypeID()) {
192        LOG(Archives, "LegacyWebArchive - Frame name is not of type CFString, cannot create invalid resource");
193        return 0;
194    }
195
196    CFStringRef mimeType = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceMIMETypeKey));
197    if (mimeType && CFGetTypeID(mimeType) != CFStringGetTypeID()) {
198        LOG(Archives, "LegacyWebArchive - MIME type is not of type CFString, cannot create invalid resource");
199        return 0;
200    }
201
202    CFStringRef url = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceURLKey));
203    if (url && CFGetTypeID(url) != CFStringGetTypeID()) {
204        LOG(Archives, "LegacyWebArchive - URL is not of type CFString, cannot create invalid resource");
205        return 0;
206    }
207
208    CFStringRef textEncoding = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceTextEncodingNameKey));
209    if (textEncoding && CFGetTypeID(textEncoding) != CFStringGetTypeID()) {
210        LOG(Archives, "LegacyWebArchive - Text encoding is not of type CFString, cannot create invalid resource");
211        return 0;
212    }
213
214    ResourceResponse response;
215
216    CFDataRef resourceResponseData = static_cast<CFDataRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceResponseKey));
217    if (resourceResponseData) {
218        if (CFGetTypeID(resourceResponseData) != CFDataGetTypeID()) {
219            LOG(Archives, "LegacyWebArchive - Resource response data is not of type CFData, cannot create invalid resource");
220            return 0;
221        }
222
223        CFStringRef resourceResponseVersion = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceResponseVersionKey));
224        if (resourceResponseVersion && CFGetTypeID(resourceResponseVersion) != CFStringGetTypeID()) {
225            LOG(Archives, "LegacyWebArchive - Resource response version is not of type CFString, cannot create invalid resource");
226            return 0;
227        }
228
229        response = createResourceResponseFromPropertyListData(resourceResponseData, resourceResponseVersion);
230    }
231
232    return ArchiveResource::create(SharedBuffer::wrapCFData(resourceData), URL(URL(), url), mimeType, textEncoding, frameName, response);
233}
234
235PassRefPtr<LegacyWebArchive> LegacyWebArchive::create()
236{
237    return adoptRef(new LegacyWebArchive);
238}
239
240PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(PassRefPtr<ArchiveResource> mainResource, Vector<RefPtr<ArchiveResource>> subresources, Vector<RefPtr<LegacyWebArchive>> subframeArchives)
241{
242    ASSERT(mainResource);
243    if (!mainResource)
244        return 0;
245
246    RefPtr<LegacyWebArchive> archive = create();
247    archive->setMainResource(mainResource);
248
249    for (unsigned i = 0; i < subresources.size(); ++i)
250        archive->addSubresource(WTF::move(subresources[i]));
251
252    for (unsigned i = 0; i < subframeArchives.size(); ++i)
253        archive->addSubframeArchive(WTF::move(subframeArchives[i]));
254
255    return archive.release();
256}
257
258PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(SharedBuffer* data)
259{
260    return create(URL(), data);
261}
262
263PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(const URL&, SharedBuffer* data)
264{
265    LOG(Archives, "LegacyWebArchive - Creating from raw data");
266
267    RefPtr<LegacyWebArchive> archive = create();
268
269    ASSERT(data);
270    if (!data)
271        return 0;
272
273    RetainPtr<CFDataRef> cfData = data->createCFData();
274    if (!cfData)
275        return 0;
276
277    CFErrorRef error = 0;
278
279    RetainPtr<CFDictionaryRef> plist = adoptCF(static_cast<CFDictionaryRef>(CFPropertyListCreateWithData(0, cfData.get(), kCFPropertyListImmutable, 0, &error)));
280    if (!plist) {
281#ifndef NDEBUG
282        RetainPtr<CFStringRef> errorString = error ? adoptCF(CFErrorCopyDescription(error)) : 0;
283        const char* cError = errorString ? CFStringGetCStringPtr(errorString.get(), kCFStringEncodingUTF8) : "unknown error";
284        LOG(Archives, "LegacyWebArchive - Error parsing PropertyList from archive data - %s", cError);
285#endif
286        if (error)
287            CFRelease(error);
288        return 0;
289    }
290
291    if (CFGetTypeID(plist.get()) != CFDictionaryGetTypeID()) {
292        LOG(Archives, "LegacyWebArchive - Archive property list is not the expected CFDictionary, aborting invalid WebArchive");
293        return 0;
294    }
295
296    if (!archive->extract(plist.get()))
297        return 0;
298
299    return archive.release();
300}
301
302bool LegacyWebArchive::extract(CFDictionaryRef dictionary)
303{
304    ASSERT(dictionary);
305    if (!dictionary) {
306        LOG(Archives, "LegacyWebArchive - Null root CFDictionary, aborting invalid WebArchive");
307        return false;
308    }
309
310    CFDictionaryRef mainResourceDict = static_cast<CFDictionaryRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveMainResourceKey));
311    if (!mainResourceDict) {
312        LOG(Archives, "LegacyWebArchive - No main resource in archive, aborting invalid WebArchive");
313        return false;
314    }
315    if (CFGetTypeID(mainResourceDict) != CFDictionaryGetTypeID()) {
316        LOG(Archives, "LegacyWebArchive - Main resource is not the expected CFDictionary, aborting invalid WebArchive");
317        return false;
318    }
319
320    setMainResource(createResource(mainResourceDict));
321    if (!mainResource()) {
322        LOG(Archives, "LegacyWebArchive - Failed to parse main resource from CFDictionary or main resource does not exist, aborting invalid WebArchive");
323        return false;
324    }
325
326    if (mainResource()->mimeType().isNull()) {
327        LOG(Archives, "LegacyWebArchive - Main resource MIME type is required, but was null.");
328        return false;
329    }
330
331    CFArrayRef subresourceArray = static_cast<CFArrayRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveSubresourcesKey));
332    if (subresourceArray && CFGetTypeID(subresourceArray) != CFArrayGetTypeID()) {
333        LOG(Archives, "LegacyWebArchive - Subresources is not the expected Array, aborting invalid WebArchive");
334        return false;
335    }
336
337    if (subresourceArray) {
338        CFIndex count = CFArrayGetCount(subresourceArray);
339        for (CFIndex i = 0; i < count; ++i) {
340            CFDictionaryRef subresourceDict = static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(subresourceArray, i));
341            if (CFGetTypeID(subresourceDict) != CFDictionaryGetTypeID()) {
342                LOG(Archives, "LegacyWebArchive - Subresource is not expected CFDictionary, aborting invalid WebArchive");
343                return false;
344            }
345
346            if (RefPtr<ArchiveResource> subresource = createResource(subresourceDict))
347                addSubresource(subresource.release());
348        }
349    }
350
351    CFArrayRef subframeArray = static_cast<CFArrayRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveSubframeArchivesKey));
352    if (subframeArray && CFGetTypeID(subframeArray) != CFArrayGetTypeID()) {
353        LOG(Archives, "LegacyWebArchive - Subframe archives is not the expected Array, aborting invalid WebArchive");
354        return false;
355    }
356
357    if (subframeArray) {
358        CFIndex count = CFArrayGetCount(subframeArray);
359        for (CFIndex i = 0; i < count; ++i) {
360            CFDictionaryRef subframeDict = static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(subframeArray, i));
361            if (CFGetTypeID(subframeDict) != CFDictionaryGetTypeID()) {
362                LOG(Archives, "LegacyWebArchive - Subframe array is not expected CFDictionary, aborting invalid WebArchive");
363                return false;
364            }
365
366            RefPtr<LegacyWebArchive> subframeArchive = create();
367            if (subframeArchive->extract(subframeDict))
368                addSubframeArchive(subframeArchive.release());
369            else
370                LOG(Archives, "LegacyWebArchive - Invalid subframe archive skipped");
371        }
372    }
373
374    return true;
375}
376
377Archive::Type LegacyWebArchive::type() const
378{
379    return Archive::WebArchive;
380}
381
382RetainPtr<CFDataRef> LegacyWebArchive::rawDataRepresentation()
383{
384    RetainPtr<CFDictionaryRef> propertyList = createPropertyListRepresentation(this);
385    ASSERT(propertyList);
386    if (!propertyList) {
387        LOG(Archives, "LegacyWebArchive - Failed to create property list for archive, returning no data");
388        return 0;
389    }
390
391    RetainPtr<CFWriteStreamRef> stream = adoptCF(CFWriteStreamCreateWithAllocatedBuffers(0, 0));
392
393    CFWriteStreamOpen(stream.get());
394    CFPropertyListWrite(propertyList.get(), stream.get(), kCFPropertyListBinaryFormat_v1_0, 0, 0);
395
396    RetainPtr<CFDataRef> plistData = adoptCF(static_cast<CFDataRef>(CFWriteStreamCopyProperty(stream.get(), kCFStreamPropertyDataWritten)));
397    ASSERT(plistData);
398
399    CFWriteStreamClose(stream.get());
400
401    if (!plistData) {
402        LOG(Archives, "LegacyWebArchive - Failed to convert property list into raw data, returning no data");
403        return 0;
404    }
405
406    return plistData;
407}
408
409#if !PLATFORM(COCOA)
410
411ResourceResponse LegacyWebArchive::createResourceResponseFromMacArchivedData(CFDataRef responseData)
412{
413    // FIXME: If is is possible to parse in a serialized NSURLResponse manually, without using
414    // NSKeyedUnarchiver, manipulating plists directly, then we want to do that here.
415    // Until then, this can be done on Mac only.
416    return ResourceResponse();
417}
418
419RetainPtr<CFDataRef> LegacyWebArchive::createPropertyListRepresentation(const ResourceResponse& response)
420{
421    // FIXME: Write out the "new" format described in createResourceResponseFromPropertyListData once we invent it.
422    return 0;
423}
424
425#endif
426
427PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(Node* node, std::function<bool (Frame&)> frameFilter)
428{
429    ASSERT(node);
430    if (!node)
431        return create();
432
433    Frame* frame = node->document().frame();
434    if (!frame)
435        return create();
436
437    // If the page was loaded with javascript enabled, we don't want to archive <noscript> tags
438    // In practice we don't actually know whether scripting was enabled when the page was originally loaded
439    // but we can approximate that by checking if scripting is enabled right now.
440    OwnPtr<Vector<QualifiedName>> tagNamesToFilter;
441    if (frame->page() && frame->page()->settings().isScriptEnabled()) {
442        tagNamesToFilter = adoptPtr(new Vector<QualifiedName>);
443        tagNamesToFilter->append(HTMLNames::noscriptTag);
444    }
445
446    Vector<Node*> nodeList;
447    String markupString = createMarkup(*node, IncludeNode, &nodeList, DoNotResolveURLs, tagNamesToFilter.get());
448    Node::NodeType nodeType = node->nodeType();
449    if (nodeType != Node::DOCUMENT_NODE && nodeType != Node::DOCUMENT_TYPE_NODE)
450        markupString = documentTypeString(node->document()) + markupString;
451
452    return create(markupString, frame, nodeList, WTF::move(frameFilter));
453}
454
455PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(Frame* frame)
456{
457    ASSERT(frame);
458
459    DocumentLoader* documentLoader = frame->loader().documentLoader();
460
461    if (!documentLoader)
462        return 0;
463
464    Vector<RefPtr<LegacyWebArchive>> subframeArchives;
465
466    for (unsigned i = 0; i < frame->tree().childCount(); ++i) {
467        if (RefPtr<LegacyWebArchive> childFrameArchive = create(frame->tree().child(i)))
468            subframeArchives.append(WTF::move(childFrameArchive));
469    }
470
471    auto subresources = documentLoader->subresources();
472    return create(documentLoader->mainResource(), WTF::move(subresources), WTF::move(subframeArchives));
473}
474
475PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(Range* range)
476{
477    if (!range)
478        return nullptr;
479
480    Node* startContainer = range->startContainer();
481    if (!startContainer)
482        return nullptr;
483
484    Document& document = startContainer->document();
485
486    Frame* frame = document.frame();
487    if (!frame)
488        return nullptr;
489
490    // FIXME: This is always "for interchange". Is that right? See the previous method.
491    Vector<Node*> nodeList;
492    String markupString = documentTypeString(document) + createMarkup(*range, &nodeList, AnnotateForInterchange);
493
494    return create(markupString, frame, nodeList, nullptr);
495}
496
497PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(const String& markupString, Frame* frame, const Vector<Node*>& nodes, std::function<bool (Frame&)> frameFilter)
498{
499    ASSERT(frame);
500
501    const ResourceResponse& response = frame->loader().documentLoader()->response();
502    URL responseURL = response.url();
503
504    // it's possible to have a response without a URL here
505    // <rdar://problem/5454935>
506    if (responseURL.isNull())
507        responseURL = URL(ParsedURLString, emptyString());
508
509    RefPtr<ArchiveResource> mainResource = ArchiveResource::create(utf8Buffer(markupString), responseURL, response.mimeType(), "UTF-8", frame->tree().uniqueName());
510
511    Vector<RefPtr<LegacyWebArchive>> subframeArchives;
512    Vector<RefPtr<ArchiveResource>> subresources;
513    HashSet<URL> uniqueSubresources;
514
515    size_t nodesSize = nodes.size();
516    for (size_t i = 0; i < nodesSize; ++i) {
517        Node& node = *nodes[i];
518        Frame* childFrame;
519        if ((isHTMLFrameElement(node) || isHTMLIFrameElement(node) || isHTMLObjectElement(node))
520            && (childFrame = toHTMLFrameOwnerElement(node).contentFrame())) {
521            if (frameFilter && !frameFilter(*childFrame))
522                continue;
523
524            if (RefPtr<LegacyWebArchive> subframeArchive = create(childFrame->document(), frameFilter))
525                subframeArchives.append(WTF::move(subframeArchive));
526            else
527                LOG_ERROR("Unabled to archive subframe %s", childFrame->tree().uniqueName().string().utf8().data());
528
529        } else {
530            ListHashSet<URL> subresourceURLs;
531            node.getSubresourceURLs(subresourceURLs);
532
533            DocumentLoader* documentLoader = frame->loader().documentLoader();
534
535            for (const auto& subresourceURL : subresourceURLs) {
536                if (uniqueSubresources.contains(subresourceURL))
537                    continue;
538
539                uniqueSubresources.add(subresourceURL);
540
541                if (RefPtr<ArchiveResource> resource = documentLoader->subresource(subresourceURL)) {
542                    subresources.append(WTF::move(resource));
543                    continue;
544                }
545
546                ResourceRequest request(subresourceURL);
547#if ENABLE(CACHE_PARTITIONING)
548                request.setCachePartition(frame->document()->topOrigin()->cachePartition());
549#endif
550                CachedResource* cachedResource = memoryCache()->resourceForRequest(request, frame->page()->sessionID());
551                if (cachedResource) {
552                    ResourceBuffer* data = cachedResource->resourceBuffer();
553
554                    if (RefPtr<ArchiveResource> resource = ArchiveResource::create(data ? data->sharedBuffer() : 0, subresourceURL, cachedResource->response())) {
555                        subresources.append(WTF::move(resource));
556                        continue;
557                    }
558                }
559
560                // FIXME: should do something better than spew to console here
561                LOG_ERROR("Failed to archive subresource for %s", subresourceURL.string().utf8().data());
562            }
563        }
564    }
565
566    // Add favicon if one exists for this page, if we are archiving the entire page.
567    if (nodesSize && nodes[0]->isDocumentNode() && iconDatabase().isEnabled()) {
568        const String& iconURL = iconDatabase().synchronousIconURLForPageURL(responseURL);
569        if (!iconURL.isEmpty() && iconDatabase().synchronousIconDataKnownForIconURL(iconURL)) {
570            if (Image* iconImage = iconDatabase().synchronousIconForPageURL(responseURL, IntSize(16, 16))) {
571                if (RefPtr<ArchiveResource> resource = ArchiveResource::create(iconImage->data(), URL(ParsedURLString, iconURL), "image/x-icon", "", ""))
572                    subresources.append(resource.release());
573            }
574        }
575    }
576
577    return create(mainResource.release(), subresources, WTF::move(subframeArchives));
578}
579
580PassRefPtr<LegacyWebArchive> LegacyWebArchive::createFromSelection(Frame* frame)
581{
582    if (!frame)
583        return nullptr;
584
585    Document* document = frame->document();
586    if (!document)
587        return nullptr;
588
589    StringBuilder builder;
590    builder.append(documentTypeString(*document));
591
592    Vector<Node*> nodeList;
593    RefPtr<Range> selectionRange = frame->selection().toNormalizedRange();
594    if (selectionRange)
595        builder.append(createMarkup(*selectionRange, &nodeList, AnnotateForInterchange));
596
597    String markupString = builder.toString();
598    RefPtr<LegacyWebArchive> archive = create(markupString, frame, nodeList, nullptr);
599
600    if (!document->isFrameSet())
601        return archive.release();
602
603    // Wrap the frameset document in an iframe so it can be pasted into
604    // another document (which will have a body or frameset of its own).
605    String iframeMarkup = "<iframe frameborder=\"no\" marginwidth=\"0\" marginheight=\"0\" width=\"98%%\" height=\"98%%\" src=\"" + frame->loader().documentLoader()->response().url().string() + "\"></iframe>";
606    RefPtr<ArchiveResource> iframeResource = ArchiveResource::create(utf8Buffer(iframeMarkup), blankURL(), "text/html", "UTF-8", String());
607
608    return create(iframeResource.release(), Vector<RefPtr<ArchiveResource>>(), Vector<RefPtr<LegacyWebArchive>> { archive });
609}
610
611}
612