1/*
2 * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2011 Google Inc. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1.  Redistributions of source code must retain the above copyright
10 *     notice, this list of conditions and the following disclaimer.
11 * 2.  Redistributions in binary form must reproduce the above copyright
12 *     notice, this list of conditions and the following disclaimer in the
13 *     documentation and/or other materials provided with the distribution.
14 * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
15 *     its contributors may be used to endorse or promote products derived
16 *     from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30#ifndef DocumentLoader_h
31#define DocumentLoader_h
32
33#include "CachedRawResourceClient.h"
34#include "CachedResourceHandle.h"
35#include "DocumentLoadTiming.h"
36#include "DocumentWriter.h"
37#include "IconDatabaseBase.h"
38#include "NavigationAction.h"
39#include "ResourceError.h"
40#include "ResourceLoaderOptions.h"
41#include "ResourceRequest.h"
42#include "ResourceResponse.h"
43#include "StringWithDirection.h"
44#include "SubstituteData.h"
45#include "Timer.h"
46#include <wtf/HashSet.h>
47#include <wtf/RefPtr.h>
48#include <wtf/Vector.h>
49
50#if HAVE(RUNLOOP_TIMER)
51#include <wtf/RunLoopTimer.h>
52#endif
53
54namespace WTF {
55class SchedulePair;
56}
57
58namespace WebCore {
59    class ApplicationCacheHost;
60#if ENABLE(WEB_ARCHIVE) || ENABLE(MHTML)
61    class Archive;
62#endif
63    class ArchiveResource;
64    class ArchiveResourceCollection;
65    class CachedRawResource;
66    class CachedResourceLoader;
67    class ContentFilter;
68    class FormState;
69    class Frame;
70    class FrameLoader;
71    class Page;
72    class ResourceBuffer;
73    class ResourceLoader;
74    class SharedBuffer;
75    class SubstituteResource;
76
77    typedef HashSet<RefPtr<ResourceLoader> > ResourceLoaderSet;
78    typedef Vector<ResourceResponse> ResponseVector;
79
80    class DocumentLoader : public RefCounted<DocumentLoader>, private CachedRawResourceClient {
81        WTF_MAKE_FAST_ALLOCATED;
82    public:
83        static PassRefPtr<DocumentLoader> create(const ResourceRequest& request, const SubstituteData& data)
84        {
85            return adoptRef(new DocumentLoader(request, data));
86        }
87        virtual ~DocumentLoader();
88
89        void setFrame(Frame*);
90        Frame* frame() const { return m_frame; }
91
92        virtual void attachToFrame();
93        virtual void detachFromFrame();
94
95        FrameLoader* frameLoader() const;
96        ResourceLoader* mainResourceLoader() const;
97        PassRefPtr<ResourceBuffer> mainResourceData() const;
98
99        DocumentWriter* writer() const { return &m_writer; }
100
101        const ResourceRequest& originalRequest() const;
102        const ResourceRequest& originalRequestCopy() const;
103
104        const ResourceRequest& request() const;
105        ResourceRequest& request();
106
107        CachedResourceLoader* cachedResourceLoader() const { return m_cachedResourceLoader.get(); }
108
109        const SubstituteData& substituteData() const { return m_substituteData; }
110
111        // FIXME: This is the same as requestURL(). We should remove one of them.
112        const KURL& url() const;
113        const KURL& unreachableURL() const;
114
115        const KURL& originalURL() const;
116        const KURL& requestURL() const;
117        const KURL& responseURL() const;
118        const String& responseMIMEType() const;
119
120        void replaceRequestURLForSameDocumentNavigation(const KURL&);
121        bool isStopping() const { return m_isStopping; }
122        void stopLoading();
123        void setCommitted(bool committed) { m_committed = committed; }
124        bool isCommitted() const { return m_committed; }
125        bool isLoading() const;
126        const ResourceResponse& response() const { return m_response; }
127        const ResourceError& mainDocumentError() const { return m_mainDocumentError; }
128        bool isClientRedirect() const { return m_isClientRedirect; }
129        void setIsClientRedirect(bool isClientRedirect) { m_isClientRedirect = isClientRedirect; }
130        void handledOnloadEvents();
131        bool wasOnloadHandled() { return m_wasOnloadHandled; }
132        bool isLoadingInAPISense() const;
133        void setTitle(const StringWithDirection&);
134        const String& overrideEncoding() const { return m_overrideEncoding; }
135
136#if PLATFORM(MAC)
137        void schedule(WTF::SchedulePair*);
138        void unschedule(WTF::SchedulePair*);
139#endif
140
141#if ENABLE(WEB_ARCHIVE) || ENABLE(MHTML)
142        void setArchive(PassRefPtr<Archive>);
143        void addAllArchiveResources(Archive*);
144        void addArchiveResource(PassRefPtr<ArchiveResource>);
145        PassRefPtr<Archive> popArchiveForSubframe(const String& frameName, const KURL&);
146        SharedBuffer* parsedArchiveData() const;
147
148        bool scheduleArchiveLoad(ResourceLoader*, const ResourceRequest&);
149#endif // ENABLE(WEB_ARCHIVE) || ENABLE(MHTML)
150
151        // Return the ArchiveResource for the URL only when loading an Archive
152        ArchiveResource* archiveResourceForURL(const KURL&) const;
153
154        PassRefPtr<ArchiveResource> mainResource() const;
155
156        // Return an ArchiveResource for the URL, either creating from live data or
157        // pulling from the ArchiveResourceCollection
158        PassRefPtr<ArchiveResource> subresource(const KURL&) const;
159        void getSubresources(Vector<PassRefPtr<ArchiveResource> >&) const;
160
161
162#ifndef NDEBUG
163        bool isSubstituteLoadPending(ResourceLoader*) const;
164#endif
165        void cancelPendingSubstituteLoad(ResourceLoader*);
166
167        void addResponse(const ResourceResponse&);
168        const ResponseVector& responses() const { return m_responses; }
169
170        const NavigationAction& triggeringAction() const { return m_triggeringAction; }
171        void setTriggeringAction(const NavigationAction& action) { m_triggeringAction = action; }
172        void setOverrideEncoding(const String& encoding) { m_overrideEncoding = encoding; }
173        void setLastCheckedRequest(const ResourceRequest& request) { m_lastCheckedRequest = request; }
174        const ResourceRequest& lastCheckedRequest()  { return m_lastCheckedRequest; }
175
176        void stopRecordingResponses();
177        const StringWithDirection& title() const { return m_pageTitle; }
178
179        KURL urlForHistory() const;
180        bool urlForHistoryReflectsFailure() const;
181
182        // These accessors accommodate WebCore's somewhat fickle custom of creating history
183        // items for redirects, but only sometimes. For "source" and "destination",
184        // these accessors return the URL that would have been used if a history
185        // item were created. This allows WebKit to link history items reflecting
186        // redirects into a chain from start to finish.
187        String clientRedirectSourceForHistory() const { return m_clientRedirectSourceForHistory; } // null if no client redirect occurred.
188        String clientRedirectDestinationForHistory() const { return urlForHistory(); }
189        void setClientRedirectSourceForHistory(const String& clientRedirectSourceForHistory) { m_clientRedirectSourceForHistory = clientRedirectSourceForHistory; }
190
191        String serverRedirectSourceForHistory() const { return (urlForHistory() == url() || url() == blankURL()) ? String() : urlForHistory().string(); } // null if no server redirect occurred.
192        String serverRedirectDestinationForHistory() const { return url(); }
193
194        bool didCreateGlobalHistoryEntry() const { return m_didCreateGlobalHistoryEntry; }
195        void setDidCreateGlobalHistoryEntry(bool didCreateGlobalHistoryEntry) { m_didCreateGlobalHistoryEntry = didCreateGlobalHistoryEntry; }
196
197        void setDefersLoading(bool);
198        void setMainResourceDataBufferingPolicy(DataBufferingPolicy);
199
200        void startLoadingMainResource();
201        void cancelMainResourceLoad(const ResourceError&);
202
203        // Support iconDatabase in synchronous mode.
204        void iconLoadDecisionAvailable();
205
206        // Support iconDatabase in asynchronous mode.
207        void continueIconLoadWithDecision(IconLoadDecision);
208        void getIconLoadDecisionForIconURL(const String&);
209        void getIconDataForIconURL(const String&);
210
211        bool isLoadingMainResource() const { return m_loadingMainResource; }
212        bool isLoadingMultipartContent() const { return m_isLoadingMultipartContent; }
213
214        void stopLoadingPlugIns();
215        void stopLoadingSubresources();
216
217        void addSubresourceLoader(ResourceLoader*);
218        void removeSubresourceLoader(ResourceLoader*);
219        void addPlugInStreamLoader(ResourceLoader*);
220        void removePlugInStreamLoader(ResourceLoader*);
221
222        void subresourceLoaderFinishedLoadingOnePart(ResourceLoader*);
223
224        void setDeferMainResourceDataLoad(bool defer) { m_deferMainResourceDataLoad = defer; }
225
226        void didTellClientAboutLoad(const String& url)
227        {
228#if !PLATFORM(MAC)
229            // Don't include data urls here, as if a lot of data is loaded
230            // that way, we hold on to the (large) url string for too long.
231            if (protocolIs(url, "data"))
232                return;
233#endif
234            if (!url.isEmpty())
235                m_resourcesClientKnowsAbout.add(url);
236        }
237        bool haveToldClientAboutLoad(const String& url) { return m_resourcesClientKnowsAbout.contains(url); }
238        void recordMemoryCacheLoadForFutureClientNotification(const ResourceRequest&);
239        void takeMemoryCacheLoadsForClientNotification(Vector<ResourceRequest>& loads);
240
241        DocumentLoadTiming* timing() { return &m_documentLoadTiming; }
242        void resetTiming() { m_documentLoadTiming = DocumentLoadTiming(); }
243
244        // The WebKit layer calls this function when it's ready for the data to
245        // actually be added to the document.
246        void commitData(const char* bytes, size_t length);
247
248        ApplicationCacheHost* applicationCacheHost() const { return m_applicationCacheHost.get(); }
249
250        void checkLoadComplete();
251
252    protected:
253        DocumentLoader(const ResourceRequest&, const SubstituteData&);
254
255        bool m_deferMainResourceDataLoad;
256
257    private:
258
259        // The URL of the document resulting from this DocumentLoader.
260        KURL documentURL() const;
261        Document* document() const;
262
263        void setRequest(const ResourceRequest&);
264
265        void commitIfReady();
266        void setMainDocumentError(const ResourceError&);
267        void commitLoad(const char*, int);
268        void clearMainResourceLoader();
269
270        void setupForReplace();
271        void maybeFinishLoadingMultipartContent();
272
273        bool maybeCreateArchive();
274#if ENABLE(WEB_ARCHIVE) || ENABLE(MHTML)
275        void clearArchiveResources();
276#endif
277
278        void willSendRequest(ResourceRequest&, const ResourceResponse&);
279        void finishedLoading(double finishTime);
280        void mainReceivedError(const ResourceError&);
281        virtual void redirectReceived(CachedResource*, ResourceRequest&, const ResourceResponse&) OVERRIDE;
282        virtual void responseReceived(CachedResource*, const ResourceResponse&) OVERRIDE;
283        virtual void dataReceived(CachedResource*, const char* data, int length) OVERRIDE;
284        virtual void notifyFinished(CachedResource*) OVERRIDE;
285
286        bool maybeLoadEmpty();
287
288        bool isMultipartReplacingLoad() const;
289        bool isPostOrRedirectAfterPost(const ResourceRequest&, const ResourceResponse&);
290
291        static void callContinueAfterNavigationPolicy(void*, const ResourceRequest&, PassRefPtr<FormState>, bool shouldContinue);
292        void continueAfterNavigationPolicy(const ResourceRequest&, bool shouldContinue);
293
294        static void callContinueAfterContentPolicy(void*, PolicyAction);
295        void continueAfterContentPolicy(PolicyAction);
296
297        void stopLoadingForPolicyChange();
298        ResourceError interruptedForPolicyChangeError() const;
299
300#if HAVE(RUNLOOP_TIMER)
301        typedef RunLoopTimer<DocumentLoader> DocumentLoaderTimer;
302#else
303        typedef Timer<DocumentLoader> DocumentLoaderTimer;
304#endif
305        void handleSubstituteDataLoadSoon();
306        void handleSubstituteDataLoadNow(DocumentLoaderTimer*);
307        void startDataLoadTimer();
308
309        void deliverSubstituteResourcesAfterDelay();
310        void substituteResourceDeliveryTimerFired(Timer<DocumentLoader>*);
311
312        void clearMainResource();
313
314        Frame* m_frame;
315        RefPtr<CachedResourceLoader> m_cachedResourceLoader;
316
317        CachedResourceHandle<CachedRawResource> m_mainResource;
318        ResourceLoaderSet m_subresourceLoaders;
319        ResourceLoaderSet m_multipartSubresourceLoaders;
320        ResourceLoaderSet m_plugInStreamLoaders;
321
322        mutable DocumentWriter m_writer;
323
324        // A reference to actual request used to create the data source.
325        // This should only be used by the resourceLoadDelegate's
326        // identifierForInitialRequest:fromDatasource: method. It is
327        // not guaranteed to remain unchanged, as requests are mutable.
328        ResourceRequest m_originalRequest;
329
330        SubstituteData m_substituteData;
331
332        // A copy of the original request used to create the data source.
333        // We have to copy the request because requests are mutable.
334        ResourceRequest m_originalRequestCopy;
335
336        // The 'working' request. It may be mutated
337        // several times from the original request to include additional
338        // headers, cookie information, canonicalization and redirects.
339        ResourceRequest m_request;
340
341        ResourceResponse m_response;
342
343        ResourceError m_mainDocumentError;
344
345        bool m_originalSubstituteDataWasValid;
346        bool m_committed;
347        bool m_isStopping;
348        bool m_gotFirstByte;
349        bool m_isClientRedirect;
350        bool m_isLoadingMultipartContent;
351
352        // FIXME: Document::m_processingLoadEvent and DocumentLoader::m_wasOnloadHandled are roughly the same
353        // and should be merged.
354        bool m_wasOnloadHandled;
355
356        StringWithDirection m_pageTitle;
357
358        String m_overrideEncoding;
359
360        // The action that triggered loading - we keep this around for the
361        // benefit of the various policy handlers.
362        NavigationAction m_triggeringAction;
363
364        // The last request that we checked click policy for - kept around
365        // so we can avoid asking again needlessly.
366        ResourceRequest m_lastCheckedRequest;
367
368        // We retain all the received responses so we can play back the
369        // WebResourceLoadDelegate messages if the item is loaded from the
370        // page cache.
371        ResponseVector m_responses;
372        bool m_stopRecordingResponses;
373
374        typedef HashMap<RefPtr<ResourceLoader>, RefPtr<SubstituteResource> > SubstituteResourceMap;
375        SubstituteResourceMap m_pendingSubstituteResources;
376        Timer<DocumentLoader> m_substituteResourceDeliveryTimer;
377
378        OwnPtr<ArchiveResourceCollection> m_archiveResourceCollection;
379#if ENABLE(WEB_ARCHIVE) || ENABLE(MHTML)
380        RefPtr<Archive> m_archive;
381        RefPtr<SharedBuffer> m_parsedArchiveData;
382#endif
383
384        HashSet<String> m_resourcesClientKnowsAbout;
385        Vector<ResourceRequest> m_resourcesLoadedFromMemoryCacheForClientNotification;
386
387        String m_clientRedirectSourceForHistory;
388        bool m_didCreateGlobalHistoryEntry;
389
390        bool m_loadingMainResource;
391        DocumentLoadTiming m_documentLoadTiming;
392
393        double m_timeOfLastDataReceived;
394        unsigned long m_identifierForLoadWithoutResourceLoader;
395
396        DocumentLoaderTimer m_dataLoadTimer;
397        bool m_waitingForContentPolicy;
398
399        RefPtr<IconLoadDecisionCallback> m_iconLoadDecisionCallback;
400        RefPtr<IconDataCallback> m_iconDataCallback;
401
402        friend class ApplicationCacheHost;  // for substitute resource delivery
403        OwnPtr<ApplicationCacheHost> m_applicationCacheHost;
404
405#if USE(CONTENT_FILTERING)
406        RefPtr<ContentFilter> m_contentFilter;
407#endif
408    };
409
410    inline void DocumentLoader::recordMemoryCacheLoadForFutureClientNotification(const ResourceRequest& request)
411    {
412        m_resourcesLoadedFromMemoryCacheForClientNotification.append(request);
413    }
414
415    inline void DocumentLoader::takeMemoryCacheLoadsForClientNotification(Vector<ResourceRequest>& loadsSet)
416    {
417        loadsSet.swap(m_resourcesLoadedFromMemoryCacheForClientNotification);
418        m_resourcesLoadedFromMemoryCacheForClientNotification.clear();
419    }
420
421}
422
423#endif // DocumentLoader_h
424