1/* 2 * Copyright (C) 2008, 2009 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of Apple Inc. ("Apple") nor the names of 14 * its contributors may be used to endorse or promote products derived 15 * from this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY 18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY 21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include "config.h" 30#include "LegacyWebArchive.h" 31 32#include "CachedResource.h" 33#include "Document.h" 34#include "DocumentLoader.h" 35#include "Frame.h" 36#include "FrameLoader.h" 37#include "FrameSelection.h" 38#include "FrameTree.h" 39#include "HTMLFrameOwnerElement.h" 40#include "HTMLNames.h" 41#include "IconDatabase.h" 42#include "Image.h" 43#include "URLHash.h" 44#include "Logging.h" 45#include "MemoryCache.h" 46#include "Page.h" 47#include "Range.h" 48#include "ResourceBuffer.h" 49#include "Settings.h" 50#include "markup.h" 51#include <wtf/ListHashSet.h> 52#include <wtf/RetainPtr.h> 53#include <wtf/text/StringBuilder.h> 54#include <wtf/text/CString.h> 55 56namespace WebCore { 57 58static const CFStringRef LegacyWebArchiveMainResourceKey = CFSTR("WebMainResource"); 59static const CFStringRef LegacyWebArchiveSubresourcesKey = CFSTR("WebSubresources"); 60static const CFStringRef LegacyWebArchiveSubframeArchivesKey = CFSTR("WebSubframeArchives"); 61static const CFStringRef LegacyWebArchiveResourceDataKey = CFSTR("WebResourceData"); 62static const CFStringRef LegacyWebArchiveResourceFrameNameKey = CFSTR("WebResourceFrameName"); 63static const CFStringRef LegacyWebArchiveResourceMIMETypeKey = CFSTR("WebResourceMIMEType"); 64static const CFStringRef LegacyWebArchiveResourceURLKey = CFSTR("WebResourceURL"); 65static const CFStringRef LegacyWebArchiveResourceTextEncodingNameKey = CFSTR("WebResourceTextEncodingName"); 66static const CFStringRef LegacyWebArchiveResourceResponseKey = CFSTR("WebResourceResponse"); 67static const CFStringRef LegacyWebArchiveResourceResponseVersionKey = CFSTR("WebResourceResponseVersion"); 68 69RetainPtr<CFDictionaryRef> LegacyWebArchive::createPropertyListRepresentation(ArchiveResource* resource, MainResourceStatus isMainResource) 70{ 71 if (!resource) { 72 // The property list representation of a null/empty WebResource has the following 3 objects stored as nil. 73 // FIXME: 0 is not serializable. Presumably we need to use kCFNull here instead for compatibility. 74 // FIXME: But why do we need to support a resource of 0? Who relies on that? 75 RetainPtr<CFMutableDictionaryRef> propertyList = adoptCF(CFDictionaryCreateMutable(0, 3, 0, 0)); 76 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceDataKey, 0); 77 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceURLKey, 0); 78 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceMIMETypeKey, 0); 79 return propertyList; 80 } 81 82 RetainPtr<CFMutableDictionaryRef> propertyList = adoptCF(CFDictionaryCreateMutable(0, 6, 0, &kCFTypeDictionaryValueCallBacks)); 83 84 // Resource data can be empty, but must be represented by an empty CFDataRef 85 SharedBuffer* data = resource->data(); 86 RetainPtr<CFDataRef> cfData; 87 if (data) 88 cfData = data->createCFData(); 89 else 90 cfData = adoptCF(CFDataCreate(0, 0, 0)); 91 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceDataKey, cfData.get()); 92 93 // Resource URL cannot be null 94 if (RetainPtr<CFStringRef> cfURL = resource->url().string().createCFString()) 95 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceURLKey, cfURL.get()); 96 else { 97 LOG(Archives, "LegacyWebArchive - NULL resource URL is invalid - returning null property list"); 98 return 0; 99 } 100 101 // FrameName should be left out if empty for subresources, but always included for main resources 102 const String& frameName(resource->frameName()); 103 if (!frameName.isEmpty() || isMainResource) 104 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceFrameNameKey, frameName.createCFString().get()); 105 106 // Set MIMEType, TextEncodingName, and ResourceResponse only if they actually exist 107 const String& mimeType(resource->mimeType()); 108 if (!mimeType.isEmpty()) 109 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceMIMETypeKey, mimeType.createCFString().get()); 110 111 const String& textEncoding(resource->textEncoding()); 112 if (!textEncoding.isEmpty()) 113 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceTextEncodingNameKey, textEncoding.createCFString().get()); 114 115 // Don't include the resource response for the main resource 116 if (!isMainResource) { 117 RetainPtr<CFDataRef> resourceResponseData = createPropertyListRepresentation(resource->response()); 118 if (resourceResponseData) 119 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveResourceResponseKey, resourceResponseData.get()); 120 } 121 122 return propertyList; 123} 124 125RetainPtr<CFDictionaryRef> LegacyWebArchive::createPropertyListRepresentation(Archive* archive) 126{ 127 RetainPtr<CFMutableDictionaryRef> propertyList = adoptCF(CFDictionaryCreateMutable(0, 3, 0, &kCFTypeDictionaryValueCallBacks)); 128 129 RetainPtr<CFDictionaryRef> mainResourceDict = createPropertyListRepresentation(archive->mainResource(), MainResource); 130 ASSERT(mainResourceDict); 131 if (!mainResourceDict) 132 return 0; 133 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveMainResourceKey, mainResourceDict.get()); 134 135 RetainPtr<CFMutableArrayRef> subresourcesArray = adoptCF(CFArrayCreateMutable(0, archive->subresources().size(), &kCFTypeArrayCallBacks)); 136 const Vector<RefPtr<ArchiveResource>>& subresources(archive->subresources()); 137 for (unsigned i = 0; i < subresources.size(); ++i) { 138 RetainPtr<CFDictionaryRef> subresource = createPropertyListRepresentation(subresources[i].get(), Subresource); 139 if (subresource) 140 CFArrayAppendValue(subresourcesArray.get(), subresource.get()); 141 else 142 LOG(Archives, "LegacyWebArchive - Failed to create property list for subresource"); 143 } 144 if (CFArrayGetCount(subresourcesArray.get())) 145 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveSubresourcesKey, subresourcesArray.get()); 146 147 RetainPtr<CFMutableArrayRef> subframesArray = adoptCF(CFArrayCreateMutable(0, archive->subframeArchives().size(), &kCFTypeArrayCallBacks)); 148 const Vector<RefPtr<Archive>>& subframeArchives(archive->subframeArchives()); 149 for (unsigned i = 0; i < subframeArchives.size(); ++i) { 150 RetainPtr<CFDictionaryRef> subframeArchive = createPropertyListRepresentation(subframeArchives[i].get()); 151 if (subframeArchive) 152 CFArrayAppendValue(subframesArray.get(), subframeArchive.get()); 153 else 154 LOG(Archives, "LegacyWebArchive - Failed to create property list for subframe archive"); 155 } 156 if (CFArrayGetCount(subframesArray.get())) 157 CFDictionarySetValue(propertyList.get(), LegacyWebArchiveSubframeArchivesKey, subframesArray.get()); 158 159 return propertyList; 160} 161 162ResourceResponse LegacyWebArchive::createResourceResponseFromPropertyListData(CFDataRef data, CFStringRef responseDataType) 163{ 164 ASSERT(data); 165 if (!data) 166 return ResourceResponse(); 167 168 // If the ResourceResponseVersion (passed in as responseDataType) exists at all, this is a "new" web archive that we 169 // can parse well in a cross platform manner If it doesn't exist, we will assume this is an "old" web archive with, 170 // NSURLResponse objects in it and parse the ResourceResponse as such. 171 if (!responseDataType) 172 return createResourceResponseFromMacArchivedData(data); 173 174 // FIXME: Parse the "new" format that the above comment references here. This format doesn't exist yet. 175 return ResourceResponse(); 176} 177 178PassRefPtr<ArchiveResource> LegacyWebArchive::createResource(CFDictionaryRef dictionary) 179{ 180 ASSERT(dictionary); 181 if (!dictionary) 182 return 0; 183 184 CFDataRef resourceData = static_cast<CFDataRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceDataKey)); 185 if (resourceData && CFGetTypeID(resourceData) != CFDataGetTypeID()) { 186 LOG(Archives, "LegacyWebArchive - Resource data is not of type CFData, cannot create invalid resource"); 187 return 0; 188 } 189 190 CFStringRef frameName = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceFrameNameKey)); 191 if (frameName && CFGetTypeID(frameName) != CFStringGetTypeID()) { 192 LOG(Archives, "LegacyWebArchive - Frame name is not of type CFString, cannot create invalid resource"); 193 return 0; 194 } 195 196 CFStringRef mimeType = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceMIMETypeKey)); 197 if (mimeType && CFGetTypeID(mimeType) != CFStringGetTypeID()) { 198 LOG(Archives, "LegacyWebArchive - MIME type is not of type CFString, cannot create invalid resource"); 199 return 0; 200 } 201 202 CFStringRef url = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceURLKey)); 203 if (url && CFGetTypeID(url) != CFStringGetTypeID()) { 204 LOG(Archives, "LegacyWebArchive - URL is not of type CFString, cannot create invalid resource"); 205 return 0; 206 } 207 208 CFStringRef textEncoding = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceTextEncodingNameKey)); 209 if (textEncoding && CFGetTypeID(textEncoding) != CFStringGetTypeID()) { 210 LOG(Archives, "LegacyWebArchive - Text encoding is not of type CFString, cannot create invalid resource"); 211 return 0; 212 } 213 214 ResourceResponse response; 215 216 CFDataRef resourceResponseData = static_cast<CFDataRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceResponseKey)); 217 if (resourceResponseData) { 218 if (CFGetTypeID(resourceResponseData) != CFDataGetTypeID()) { 219 LOG(Archives, "LegacyWebArchive - Resource response data is not of type CFData, cannot create invalid resource"); 220 return 0; 221 } 222 223 CFStringRef resourceResponseVersion = static_cast<CFStringRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveResourceResponseVersionKey)); 224 if (resourceResponseVersion && CFGetTypeID(resourceResponseVersion) != CFStringGetTypeID()) { 225 LOG(Archives, "LegacyWebArchive - Resource response version is not of type CFString, cannot create invalid resource"); 226 return 0; 227 } 228 229 response = createResourceResponseFromPropertyListData(resourceResponseData, resourceResponseVersion); 230 } 231 232 return ArchiveResource::create(SharedBuffer::wrapCFData(resourceData), URL(URL(), url), mimeType, textEncoding, frameName, response); 233} 234 235PassRefPtr<LegacyWebArchive> LegacyWebArchive::create() 236{ 237 return adoptRef(new LegacyWebArchive); 238} 239 240PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(PassRefPtr<ArchiveResource> mainResource, Vector<RefPtr<ArchiveResource>> subresources, Vector<RefPtr<LegacyWebArchive>> subframeArchives) 241{ 242 ASSERT(mainResource); 243 if (!mainResource) 244 return 0; 245 246 RefPtr<LegacyWebArchive> archive = create(); 247 archive->setMainResource(mainResource); 248 249 for (unsigned i = 0; i < subresources.size(); ++i) 250 archive->addSubresource(WTF::move(subresources[i])); 251 252 for (unsigned i = 0; i < subframeArchives.size(); ++i) 253 archive->addSubframeArchive(WTF::move(subframeArchives[i])); 254 255 return archive.release(); 256} 257 258PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(SharedBuffer* data) 259{ 260 return create(URL(), data); 261} 262 263PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(const URL&, SharedBuffer* data) 264{ 265 LOG(Archives, "LegacyWebArchive - Creating from raw data"); 266 267 RefPtr<LegacyWebArchive> archive = create(); 268 269 ASSERT(data); 270 if (!data) 271 return 0; 272 273 RetainPtr<CFDataRef> cfData = data->createCFData(); 274 if (!cfData) 275 return 0; 276 277 CFErrorRef error = 0; 278 279 RetainPtr<CFDictionaryRef> plist = adoptCF(static_cast<CFDictionaryRef>(CFPropertyListCreateWithData(0, cfData.get(), kCFPropertyListImmutable, 0, &error))); 280 if (!plist) { 281#ifndef NDEBUG 282 RetainPtr<CFStringRef> errorString = error ? adoptCF(CFErrorCopyDescription(error)) : 0; 283 const char* cError = errorString ? CFStringGetCStringPtr(errorString.get(), kCFStringEncodingUTF8) : "unknown error"; 284 LOG(Archives, "LegacyWebArchive - Error parsing PropertyList from archive data - %s", cError); 285#endif 286 if (error) 287 CFRelease(error); 288 return 0; 289 } 290 291 if (CFGetTypeID(plist.get()) != CFDictionaryGetTypeID()) { 292 LOG(Archives, "LegacyWebArchive - Archive property list is not the expected CFDictionary, aborting invalid WebArchive"); 293 return 0; 294 } 295 296 if (!archive->extract(plist.get())) 297 return 0; 298 299 return archive.release(); 300} 301 302bool LegacyWebArchive::extract(CFDictionaryRef dictionary) 303{ 304 ASSERT(dictionary); 305 if (!dictionary) { 306 LOG(Archives, "LegacyWebArchive - Null root CFDictionary, aborting invalid WebArchive"); 307 return false; 308 } 309 310 CFDictionaryRef mainResourceDict = static_cast<CFDictionaryRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveMainResourceKey)); 311 if (!mainResourceDict) { 312 LOG(Archives, "LegacyWebArchive - No main resource in archive, aborting invalid WebArchive"); 313 return false; 314 } 315 if (CFGetTypeID(mainResourceDict) != CFDictionaryGetTypeID()) { 316 LOG(Archives, "LegacyWebArchive - Main resource is not the expected CFDictionary, aborting invalid WebArchive"); 317 return false; 318 } 319 320 setMainResource(createResource(mainResourceDict)); 321 if (!mainResource()) { 322 LOG(Archives, "LegacyWebArchive - Failed to parse main resource from CFDictionary or main resource does not exist, aborting invalid WebArchive"); 323 return false; 324 } 325 326 if (mainResource()->mimeType().isNull()) { 327 LOG(Archives, "LegacyWebArchive - Main resource MIME type is required, but was null."); 328 return false; 329 } 330 331 CFArrayRef subresourceArray = static_cast<CFArrayRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveSubresourcesKey)); 332 if (subresourceArray && CFGetTypeID(subresourceArray) != CFArrayGetTypeID()) { 333 LOG(Archives, "LegacyWebArchive - Subresources is not the expected Array, aborting invalid WebArchive"); 334 return false; 335 } 336 337 if (subresourceArray) { 338 CFIndex count = CFArrayGetCount(subresourceArray); 339 for (CFIndex i = 0; i < count; ++i) { 340 CFDictionaryRef subresourceDict = static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(subresourceArray, i)); 341 if (CFGetTypeID(subresourceDict) != CFDictionaryGetTypeID()) { 342 LOG(Archives, "LegacyWebArchive - Subresource is not expected CFDictionary, aborting invalid WebArchive"); 343 return false; 344 } 345 346 if (RefPtr<ArchiveResource> subresource = createResource(subresourceDict)) 347 addSubresource(subresource.release()); 348 } 349 } 350 351 CFArrayRef subframeArray = static_cast<CFArrayRef>(CFDictionaryGetValue(dictionary, LegacyWebArchiveSubframeArchivesKey)); 352 if (subframeArray && CFGetTypeID(subframeArray) != CFArrayGetTypeID()) { 353 LOG(Archives, "LegacyWebArchive - Subframe archives is not the expected Array, aborting invalid WebArchive"); 354 return false; 355 } 356 357 if (subframeArray) { 358 CFIndex count = CFArrayGetCount(subframeArray); 359 for (CFIndex i = 0; i < count; ++i) { 360 CFDictionaryRef subframeDict = static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(subframeArray, i)); 361 if (CFGetTypeID(subframeDict) != CFDictionaryGetTypeID()) { 362 LOG(Archives, "LegacyWebArchive - Subframe array is not expected CFDictionary, aborting invalid WebArchive"); 363 return false; 364 } 365 366 RefPtr<LegacyWebArchive> subframeArchive = create(); 367 if (subframeArchive->extract(subframeDict)) 368 addSubframeArchive(subframeArchive.release()); 369 else 370 LOG(Archives, "LegacyWebArchive - Invalid subframe archive skipped"); 371 } 372 } 373 374 return true; 375} 376 377Archive::Type LegacyWebArchive::type() const 378{ 379 return Archive::WebArchive; 380} 381 382RetainPtr<CFDataRef> LegacyWebArchive::rawDataRepresentation() 383{ 384 RetainPtr<CFDictionaryRef> propertyList = createPropertyListRepresentation(this); 385 ASSERT(propertyList); 386 if (!propertyList) { 387 LOG(Archives, "LegacyWebArchive - Failed to create property list for archive, returning no data"); 388 return 0; 389 } 390 391 RetainPtr<CFWriteStreamRef> stream = adoptCF(CFWriteStreamCreateWithAllocatedBuffers(0, 0)); 392 393 CFWriteStreamOpen(stream.get()); 394 CFPropertyListWrite(propertyList.get(), stream.get(), kCFPropertyListBinaryFormat_v1_0, 0, 0); 395 396 RetainPtr<CFDataRef> plistData = adoptCF(static_cast<CFDataRef>(CFWriteStreamCopyProperty(stream.get(), kCFStreamPropertyDataWritten))); 397 ASSERT(plistData); 398 399 CFWriteStreamClose(stream.get()); 400 401 if (!plistData) { 402 LOG(Archives, "LegacyWebArchive - Failed to convert property list into raw data, returning no data"); 403 return 0; 404 } 405 406 return plistData; 407} 408 409#if !PLATFORM(COCOA) 410 411ResourceResponse LegacyWebArchive::createResourceResponseFromMacArchivedData(CFDataRef responseData) 412{ 413 // FIXME: If is is possible to parse in a serialized NSURLResponse manually, without using 414 // NSKeyedUnarchiver, manipulating plists directly, then we want to do that here. 415 // Until then, this can be done on Mac only. 416 return ResourceResponse(); 417} 418 419RetainPtr<CFDataRef> LegacyWebArchive::createPropertyListRepresentation(const ResourceResponse& response) 420{ 421 // FIXME: Write out the "new" format described in createResourceResponseFromPropertyListData once we invent it. 422 return 0; 423} 424 425#endif 426 427PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(Node* node, std::function<bool (Frame&)> frameFilter) 428{ 429 ASSERT(node); 430 if (!node) 431 return create(); 432 433 Frame* frame = node->document().frame(); 434 if (!frame) 435 return create(); 436 437 // If the page was loaded with javascript enabled, we don't want to archive <noscript> tags 438 // In practice we don't actually know whether scripting was enabled when the page was originally loaded 439 // but we can approximate that by checking if scripting is enabled right now. 440 OwnPtr<Vector<QualifiedName>> tagNamesToFilter; 441 if (frame->page() && frame->page()->settings().isScriptEnabled()) { 442 tagNamesToFilter = adoptPtr(new Vector<QualifiedName>); 443 tagNamesToFilter->append(HTMLNames::noscriptTag); 444 } 445 446 Vector<Node*> nodeList; 447 String markupString = createMarkup(*node, IncludeNode, &nodeList, DoNotResolveURLs, tagNamesToFilter.get()); 448 Node::NodeType nodeType = node->nodeType(); 449 if (nodeType != Node::DOCUMENT_NODE && nodeType != Node::DOCUMENT_TYPE_NODE) 450 markupString = documentTypeString(node->document()) + markupString; 451 452 return create(markupString, frame, nodeList, WTF::move(frameFilter)); 453} 454 455PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(Frame* frame) 456{ 457 ASSERT(frame); 458 459 DocumentLoader* documentLoader = frame->loader().documentLoader(); 460 461 if (!documentLoader) 462 return 0; 463 464 Vector<RefPtr<LegacyWebArchive>> subframeArchives; 465 466 for (unsigned i = 0; i < frame->tree().childCount(); ++i) { 467 if (RefPtr<LegacyWebArchive> childFrameArchive = create(frame->tree().child(i))) 468 subframeArchives.append(WTF::move(childFrameArchive)); 469 } 470 471 auto subresources = documentLoader->subresources(); 472 return create(documentLoader->mainResource(), WTF::move(subresources), WTF::move(subframeArchives)); 473} 474 475PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(Range* range) 476{ 477 if (!range) 478 return nullptr; 479 480 Node* startContainer = range->startContainer(); 481 if (!startContainer) 482 return nullptr; 483 484 Document& document = startContainer->document(); 485 486 Frame* frame = document.frame(); 487 if (!frame) 488 return nullptr; 489 490 // FIXME: This is always "for interchange". Is that right? See the previous method. 491 Vector<Node*> nodeList; 492 String markupString = documentTypeString(document) + createMarkup(*range, &nodeList, AnnotateForInterchange); 493 494 return create(markupString, frame, nodeList, nullptr); 495} 496 497PassRefPtr<LegacyWebArchive> LegacyWebArchive::create(const String& markupString, Frame* frame, const Vector<Node*>& nodes, std::function<bool (Frame&)> frameFilter) 498{ 499 ASSERT(frame); 500 501 const ResourceResponse& response = frame->loader().documentLoader()->response(); 502 URL responseURL = response.url(); 503 504 // it's possible to have a response without a URL here 505 // <rdar://problem/5454935> 506 if (responseURL.isNull()) 507 responseURL = URL(ParsedURLString, emptyString()); 508 509 RefPtr<ArchiveResource> mainResource = ArchiveResource::create(utf8Buffer(markupString), responseURL, response.mimeType(), "UTF-8", frame->tree().uniqueName()); 510 511 Vector<RefPtr<LegacyWebArchive>> subframeArchives; 512 Vector<RefPtr<ArchiveResource>> subresources; 513 HashSet<URL> uniqueSubresources; 514 515 size_t nodesSize = nodes.size(); 516 for (size_t i = 0; i < nodesSize; ++i) { 517 Node& node = *nodes[i]; 518 Frame* childFrame; 519 if ((isHTMLFrameElement(node) || isHTMLIFrameElement(node) || isHTMLObjectElement(node)) 520 && (childFrame = toHTMLFrameOwnerElement(node).contentFrame())) { 521 if (frameFilter && !frameFilter(*childFrame)) 522 continue; 523 524 if (RefPtr<LegacyWebArchive> subframeArchive = create(childFrame->document(), frameFilter)) 525 subframeArchives.append(WTF::move(subframeArchive)); 526 else 527 LOG_ERROR("Unabled to archive subframe %s", childFrame->tree().uniqueName().string().utf8().data()); 528 529 } else { 530 ListHashSet<URL> subresourceURLs; 531 node.getSubresourceURLs(subresourceURLs); 532 533 DocumentLoader* documentLoader = frame->loader().documentLoader(); 534 535 for (const auto& subresourceURL : subresourceURLs) { 536 if (uniqueSubresources.contains(subresourceURL)) 537 continue; 538 539 uniqueSubresources.add(subresourceURL); 540 541 if (RefPtr<ArchiveResource> resource = documentLoader->subresource(subresourceURL)) { 542 subresources.append(WTF::move(resource)); 543 continue; 544 } 545 546 ResourceRequest request(subresourceURL); 547#if ENABLE(CACHE_PARTITIONING) 548 request.setCachePartition(frame->document()->topOrigin()->cachePartition()); 549#endif 550 CachedResource* cachedResource = memoryCache()->resourceForRequest(request, frame->page()->sessionID()); 551 if (cachedResource) { 552 ResourceBuffer* data = cachedResource->resourceBuffer(); 553 554 if (RefPtr<ArchiveResource> resource = ArchiveResource::create(data ? data->sharedBuffer() : 0, subresourceURL, cachedResource->response())) { 555 subresources.append(WTF::move(resource)); 556 continue; 557 } 558 } 559 560 // FIXME: should do something better than spew to console here 561 LOG_ERROR("Failed to archive subresource for %s", subresourceURL.string().utf8().data()); 562 } 563 } 564 } 565 566 // Add favicon if one exists for this page, if we are archiving the entire page. 567 if (nodesSize && nodes[0]->isDocumentNode() && iconDatabase().isEnabled()) { 568 const String& iconURL = iconDatabase().synchronousIconURLForPageURL(responseURL); 569 if (!iconURL.isEmpty() && iconDatabase().synchronousIconDataKnownForIconURL(iconURL)) { 570 if (Image* iconImage = iconDatabase().synchronousIconForPageURL(responseURL, IntSize(16, 16))) { 571 if (RefPtr<ArchiveResource> resource = ArchiveResource::create(iconImage->data(), URL(ParsedURLString, iconURL), "image/x-icon", "", "")) 572 subresources.append(resource.release()); 573 } 574 } 575 } 576 577 return create(mainResource.release(), subresources, WTF::move(subframeArchives)); 578} 579 580PassRefPtr<LegacyWebArchive> LegacyWebArchive::createFromSelection(Frame* frame) 581{ 582 if (!frame) 583 return nullptr; 584 585 Document* document = frame->document(); 586 if (!document) 587 return nullptr; 588 589 StringBuilder builder; 590 builder.append(documentTypeString(*document)); 591 592 Vector<Node*> nodeList; 593 RefPtr<Range> selectionRange = frame->selection().toNormalizedRange(); 594 if (selectionRange) 595 builder.append(createMarkup(*selectionRange, &nodeList, AnnotateForInterchange)); 596 597 String markupString = builder.toString(); 598 RefPtr<LegacyWebArchive> archive = create(markupString, frame, nodeList, nullptr); 599 600 if (!document->isFrameSet()) 601 return archive.release(); 602 603 // Wrap the frameset document in an iframe so it can be pasted into 604 // another document (which will have a body or frameset of its own). 605 String iframeMarkup = "<iframe frameborder=\"no\" marginwidth=\"0\" marginheight=\"0\" width=\"98%%\" height=\"98%%\" src=\"" + frame->loader().documentLoader()->response().url().string() + "\"></iframe>"; 606 RefPtr<ArchiveResource> iframeResource = ArchiveResource::create(utf8Buffer(iframeMarkup), blankURL(), "text/html", "UTF-8", String()); 607 608 return create(iframeResource.release(), Vector<RefPtr<ArchiveResource>>(), Vector<RefPtr<LegacyWebArchive>> { archive }); 609} 610 611} 612