1/*
2 * Copyright (C) 2008, 2014 Apple Inc. All Rights Reserved.
3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
4 * Copyright (C) 2010 Google Inc. All Rights Reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "config.h"
29#include "HTMLPreloadScanner.h"
30
31#include "HTMLNames.h"
32#include "HTMLParserIdioms.h"
33#include "HTMLSrcsetParser.h"
34#include "HTMLTokenizer.h"
35#include "InputTypeNames.h"
36#include "LinkRelAttribute.h"
37#include "SourceSizeList.h"
38#include <wtf/MainThread.h>
39
40namespace WebCore {
41
42using namespace HTMLNames;
43
44TokenPreloadScanner::TagId TokenPreloadScanner::tagIdFor(const HTMLToken::DataVector& data)
45{
46    AtomicString tagName(data);
47    if (tagName == imgTag)
48        return TagId::Img;
49    if (tagName == inputTag)
50        return TagId::Input;
51    if (tagName == linkTag)
52        return TagId::Link;
53    if (tagName == scriptTag)
54        return TagId::Script;
55    if (tagName == styleTag)
56        return TagId::Style;
57    if (tagName == baseTag)
58        return TagId::Base;
59    if (tagName == templateTag)
60        return TagId::Template;
61    return TagId::Unknown;
62}
63
64String TokenPreloadScanner::initiatorFor(TagId tagId)
65{
66    switch (tagId) {
67    case TagId::Img:
68        return "img";
69    case TagId::Input:
70        return "input";
71    case TagId::Link:
72        return "link";
73    case TagId::Script:
74        return "script";
75    case TagId::Unknown:
76    case TagId::Style:
77    case TagId::Base:
78    case TagId::Template:
79        ASSERT_NOT_REACHED();
80        return "unknown";
81    }
82    ASSERT_NOT_REACHED();
83    return "unknown";
84}
85
86class TokenPreloadScanner::StartTagScanner {
87public:
88    explicit StartTagScanner(TagId tagId, float deviceScaleFactor = 1.0)
89        : m_tagId(tagId)
90        , m_linkIsStyleSheet(false)
91        , m_inputIsImage(false)
92        , m_deviceScaleFactor(deviceScaleFactor)
93    {
94    }
95
96    void processAttributes(const HTMLToken::AttributeList& attributes
97#if ENABLE(PICTURE_SIZES)
98        , RenderView* view, Frame* frame
99#endif
100        )
101    {
102        ASSERT(isMainThread());
103        if (m_tagId >= TagId::Unknown)
104            return;
105        for (HTMLToken::AttributeList::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) {
106            AtomicString attributeName(iter->name);
107            String attributeValue = StringImpl::create8BitIfPossible(iter->value);
108            processAttribute(attributeName, attributeValue);
109        }
110
111        // Resolve between src and srcSet if we have them.
112        if (!m_srcSetAttribute.isEmpty()) {
113            ImageCandidate imageCandidate = bestFitSourceForImageAttributes(m_deviceScaleFactor, m_urlToLoad, m_srcSetAttribute
114#if ENABLE(PICTURE_SIZES)
115                , SourceSizeList::parseSizesAttribute(m_sizesAttribute, view, frame)
116#endif
117                );
118            setUrlToLoad(imageCandidate.string.toString(), true);
119        }
120    }
121
122    std::unique_ptr<PreloadRequest> createPreloadRequest(const URL& predictedBaseURL)
123    {
124        if (!shouldPreload())
125            return nullptr;
126
127        auto request = std::make_unique<PreloadRequest>(initiatorFor(m_tagId), m_urlToLoad, predictedBaseURL, resourceType(), m_mediaAttribute);
128
129        request->setCrossOriginModeAllowsCookies(crossOriginModeAllowsCookies());
130        request->setCharset(charset());
131        return request;
132    }
133
134    static bool match(const AtomicString& name, const QualifiedName& qName)
135    {
136        ASSERT(isMainThread());
137        return qName.localName() == name;
138    }
139
140private:
141    template<typename NameType>
142    void processAttribute(const NameType& attributeName, const String& attributeValue)
143    {
144        if (match(attributeName, charsetAttr))
145            m_charset = attributeValue;
146
147        if (m_tagId == TagId::Script || m_tagId == TagId::Img) {
148            if (match(attributeName, srcAttr))
149                setUrlToLoad(attributeValue);
150            else if (match(attributeName, srcsetAttr) && m_srcSetAttribute.isNull())
151                m_srcSetAttribute = attributeValue;
152#if ENABLE(PICTURE_SIZES)
153            else if (match(attributeName, sizesAttr) && m_sizesAttribute.isNull())
154                m_sizesAttribute = attributeValue;
155#endif
156            else if (match(attributeName, crossoriginAttr) && !attributeValue.isNull())
157                m_crossOriginMode = stripLeadingAndTrailingHTMLSpaces(attributeValue);
158        } else if (m_tagId == TagId::Link) {
159            if (match(attributeName, hrefAttr))
160                setUrlToLoad(attributeValue);
161            else if (match(attributeName, relAttr))
162                m_linkIsStyleSheet = relAttributeIsStyleSheet(attributeValue);
163            else if (match(attributeName, mediaAttr))
164                m_mediaAttribute = attributeValue;
165        } else if (m_tagId == TagId::Input) {
166            if (match(attributeName, srcAttr))
167                setUrlToLoad(attributeValue);
168            else if (match(attributeName, typeAttr))
169                m_inputIsImage = equalIgnoringCase(attributeValue, InputTypeNames::image());
170        }
171    }
172
173    static bool relAttributeIsStyleSheet(const String& attributeValue)
174    {
175        LinkRelAttribute rel(attributeValue);
176        return rel.m_isStyleSheet && !rel.m_isAlternate && rel.m_iconType == InvalidIcon && !rel.m_isDNSPrefetch;
177    }
178
179    void setUrlToLoad(const String& value, bool allowReplacement = false)
180    {
181        // We only respect the first src/href, per HTML5:
182        // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#attribute-name-state
183        if (!allowReplacement && !m_urlToLoad.isEmpty())
184            return;
185        String url = stripLeadingAndTrailingHTMLSpaces(value);
186        if (url.isEmpty())
187            return;
188        m_urlToLoad = url;
189    }
190
191    const String& charset() const
192    {
193        // FIXME: Its not clear that this if is needed, the loader probably ignores charset for image requests anyway.
194        if (m_tagId == TagId::Img)
195            return emptyString();
196        return m_charset;
197    }
198
199    CachedResource::Type resourceType() const
200    {
201        if (m_tagId == TagId::Script)
202            return CachedResource::Script;
203        if (m_tagId == TagId::Img || (m_tagId == TagId::Input && m_inputIsImage))
204            return CachedResource::ImageResource;
205        if (m_tagId == TagId::Link && m_linkIsStyleSheet)
206            return CachedResource::CSSStyleSheet;
207        ASSERT_NOT_REACHED();
208        return CachedResource::RawResource;
209    }
210
211    bool shouldPreload()
212    {
213        if (m_urlToLoad.isEmpty())
214            return false;
215
216        if (m_tagId == TagId::Link && !m_linkIsStyleSheet)
217            return false;
218
219        if (m_tagId == TagId::Input && !m_inputIsImage)
220            return false;
221
222        return true;
223    }
224
225    bool crossOriginModeAllowsCookies()
226    {
227        return m_crossOriginMode.isNull() || equalIgnoringCase(m_crossOriginMode, "use-credentials");
228    }
229
230    TagId m_tagId;
231    String m_urlToLoad;
232    String m_srcSetAttribute;
233#if ENABLE(PICTURE_SIZES)
234    String m_sizesAttribute;
235#endif
236    String m_charset;
237    String m_crossOriginMode;
238    bool m_linkIsStyleSheet;
239    String m_mediaAttribute;
240    bool m_inputIsImage;
241    float m_deviceScaleFactor;
242};
243
244TokenPreloadScanner::TokenPreloadScanner(const URL& documentURL, float deviceScaleFactor)
245    : m_documentURL(documentURL)
246    , m_inStyle(false)
247    , m_deviceScaleFactor(deviceScaleFactor)
248#if ENABLE(TEMPLATE_ELEMENT)
249    , m_templateCount(0)
250#endif
251{
252}
253
254TokenPreloadScanner::~TokenPreloadScanner()
255{
256}
257
258TokenPreloadScannerCheckpoint TokenPreloadScanner::createCheckpoint()
259{
260    TokenPreloadScannerCheckpoint checkpoint = m_checkpoints.size();
261    m_checkpoints.append(Checkpoint(m_predictedBaseElementURL, m_inStyle
262#if ENABLE(TEMPLATE_ELEMENT)
263                                    , m_templateCount
264#endif
265                                    ));
266    return checkpoint;
267}
268
269void TokenPreloadScanner::rewindTo(TokenPreloadScannerCheckpoint checkpointIndex)
270{
271    ASSERT(checkpointIndex < m_checkpoints.size()); // If this ASSERT fires, checkpointIndex is invalid.
272    const Checkpoint& checkpoint = m_checkpoints[checkpointIndex];
273    m_predictedBaseElementURL = checkpoint.predictedBaseElementURL;
274    m_inStyle = checkpoint.inStyle;
275#if ENABLE(TEMPLATE_ELEMENT)
276    m_templateCount = checkpoint.templateCount;
277#endif
278    m_cssScanner.reset();
279    m_checkpoints.clear();
280}
281
282void TokenPreloadScanner::scan(const HTMLToken& token, Vector<std::unique_ptr<PreloadRequest>>& requests
283#if ENABLE(PICTURE_SIZES)
284        , RenderView* view, Frame* frame
285#endif
286        )
287{
288    switch (token.type()) {
289    case HTMLToken::Character:
290        if (!m_inStyle)
291            return;
292        m_cssScanner.scan(token.data(), requests);
293        return;
294
295    case HTMLToken::EndTag: {
296        TagId tagId = tagIdFor(token.data());
297#if ENABLE(TEMPLATE_ELEMENT)
298        if (tagId == TagId::Template) {
299            if (m_templateCount)
300                --m_templateCount;
301            return;
302        }
303#endif
304        if (tagId == TagId::Style) {
305            if (m_inStyle)
306                m_cssScanner.reset();
307            m_inStyle = false;
308        }
309        return;
310    }
311
312    case HTMLToken::StartTag: {
313#if ENABLE(TEMPLATE_ELEMENT)
314        if (m_templateCount)
315            return;
316#endif
317        TagId tagId = tagIdFor(token.data());
318#if ENABLE(TEMPLATE_ELEMENT)
319        if (tagId == TagId::Template) {
320            ++m_templateCount;
321            return;
322        }
323#endif
324        if (tagId == TagId::Style) {
325            m_inStyle = true;
326            return;
327        }
328        if (tagId == TagId::Base) {
329            // The first <base> element is the one that wins.
330            if (!m_predictedBaseElementURL.isEmpty())
331                return;
332            updatePredictedBaseURL(token);
333            return;
334        }
335
336        StartTagScanner scanner(tagId, m_deviceScaleFactor);
337        scanner.processAttributes(token.attributes()
338#if ENABLE(PICTURE_SIZES)
339            , view, frame
340#endif
341            );
342        if (auto request = scanner.createPreloadRequest(m_predictedBaseElementURL))
343            requests.append(WTF::move(request));
344        return;
345    }
346
347    default:
348        return;
349    }
350}
351
352template<typename Token>
353void TokenPreloadScanner::updatePredictedBaseURL(const Token& token)
354{
355    ASSERT(m_predictedBaseElementURL.isEmpty());
356    if (const typename Token::Attribute* hrefAttribute = token.getAttributeItem(hrefAttr))
357        m_predictedBaseElementURL = URL(m_documentURL, stripLeadingAndTrailingHTMLSpaces(hrefAttribute->value)).copy();
358}
359
360HTMLPreloadScanner::HTMLPreloadScanner(const HTMLParserOptions& options, const URL& documentURL, float deviceScaleFactor)
361    : m_scanner(documentURL, deviceScaleFactor)
362    , m_tokenizer(std::make_unique<HTMLTokenizer>(options))
363{
364}
365
366HTMLPreloadScanner::~HTMLPreloadScanner()
367{
368}
369
370void HTMLPreloadScanner::appendToEnd(const SegmentedString& source)
371{
372    m_source.append(source);
373}
374
375void HTMLPreloadScanner::scan(HTMLResourcePreloader* preloader, const URL& startingBaseElementURL
376#if ENABLE(PICTURE_SIZES)
377        , RenderView* view, Frame* frame
378#endif
379        )
380{
381    ASSERT(isMainThread()); // HTMLTokenizer::updateStateFor only works on the main thread.
382
383    // When we start scanning, our best prediction of the baseElementURL is the real one!
384    if (!startingBaseElementURL.isEmpty())
385        m_scanner.setPredictedBaseElementURL(startingBaseElementURL);
386
387    PreloadRequestStream requests;
388
389    while (m_tokenizer->nextToken(m_source, m_token)) {
390        if (m_token.type() == HTMLToken::StartTag)
391            m_tokenizer->updateStateFor(AtomicString(m_token.name()));
392        m_scanner.scan(m_token, requests
393#if ENABLE(PICTURE_SIZES)
394            , view, frame
395#endif
396            );
397        m_token.clear();
398    }
399
400    preloader->preload(WTF::move(requests));
401}
402
403}
404