1/*
2    Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de)
3    Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com)
4    Copyright (C) 2006, 2008 Apple Inc. All rights reserved.
5
6    This library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Library General Public
8    License as published by the Free Software Foundation; either
9    version 2 of the License, or (at your option) any later version.
10
11    This library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Library General Public License for more details.
15
16    You should have received a copy of the GNU Library General Public License
17    along with this library; see the file COPYING.LIB.  If not, write to
18    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19    Boston, MA 02110-1301, USA.
20
21*/
22
23#ifndef TextResourceDecoder_h
24#define TextResourceDecoder_h
25
26#include "TextEncoding.h"
27#include <wtf/RefCounted.h>
28
29namespace WebCore {
30
31class HTMLMetaCharsetParser;
32
33class TextResourceDecoder : public RefCounted<TextResourceDecoder> {
34public:
35    enum EncodingSource {
36        DefaultEncoding,
37        AutoDetectedEncoding,
38        EncodingFromXMLHeader,
39        EncodingFromMetaTag,
40        EncodingFromCSSCharset,
41        EncodingFromHTTPHeader,
42        UserChosenEncoding,
43        EncodingFromParentFrame
44    };
45
46    static PassRefPtr<TextResourceDecoder> create(const String& mimeType, const TextEncoding& defaultEncoding = TextEncoding(), bool usesEncodingDetector = false)
47    {
48        return adoptRef(new TextResourceDecoder(mimeType, defaultEncoding, usesEncodingDetector));
49    }
50    ~TextResourceDecoder();
51
52    void setEncoding(const TextEncoding&, EncodingSource);
53    const TextEncoding& encoding() const { return m_encoding; }
54
55    String decode(const char* data, size_t length);
56    String flush();
57
58    String decodeAndFlush(const char* data, size_t length);
59
60    void setHintEncoding(const TextResourceDecoder* hintDecoder)
61    {
62        // hintEncoding is for use with autodetection, which should be
63        // only invoked when hintEncoding comes from auto-detection.
64        if (hintDecoder && hintDecoder->m_source == AutoDetectedEncoding)
65            m_hintEncoding = hintDecoder->encoding().name();
66    }
67
68    void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; }
69    bool sawError() const { return m_sawError; }
70
71private:
72    TextResourceDecoder(const String& mimeType, const TextEncoding& defaultEncoding, bool usesEncodingDetector);
73
74    enum ContentType { PlainText, HTML, XML, CSS }; // PlainText only checks for BOM.
75    static ContentType determineContentType(const String& mimeType);
76    static const TextEncoding& defaultEncoding(ContentType, const TextEncoding& defaultEncoding);
77
78    size_t checkForBOM(const char*, size_t);
79    bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer);
80    bool checkForHeadCharset(const char*, size_t, bool& movedDataToBuffer);
81    bool checkForMetaCharset(const char*, size_t);
82    void detectJapaneseEncoding(const char*, size_t);
83    bool shouldAutoDetect() const;
84
85    ContentType m_contentType;
86    TextEncoding m_encoding;
87    OwnPtr<TextCodec> m_codec;
88    EncodingSource m_source;
89    const char* m_hintEncoding;
90    Vector<char> m_buffer;
91    bool m_checkedForBOM;
92    bool m_checkedForCSSCharset;
93    bool m_checkedForHeadCharset;
94    bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors.
95    bool m_sawError;
96    bool m_usesEncodingDetector;
97
98    std::unique_ptr<HTMLMetaCharsetParser> m_charsetParser;
99};
100
101}
102
103#endif
104