1/* 2 Copyright (C) 1999 Lars Knoll (knoll@mpi-hd.mpg.de) 3 Copyright (C) 2006 Alexey Proskuryakov (ap@nypop.com) 4 Copyright (C) 2006, 2008 Apple Inc. All rights reserved. 5 6 This library is free software; you can redistribute it and/or 7 modify it under the terms of the GNU Library General Public 8 License as published by the Free Software Foundation; either 9 version 2 of the License, or (at your option) any later version. 10 11 This library is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 Library General Public License for more details. 15 16 You should have received a copy of the GNU Library General Public License 17 along with this library; see the file COPYING.LIB. If not, write to 18 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 Boston, MA 02110-1301, USA. 20 21*/ 22 23#ifndef TextResourceDecoder_h 24#define TextResourceDecoder_h 25 26#include "TextEncoding.h" 27#include <wtf/RefCounted.h> 28 29namespace WebCore { 30 31class HTMLMetaCharsetParser; 32 33class TextResourceDecoder : public RefCounted<TextResourceDecoder> { 34public: 35 enum EncodingSource { 36 DefaultEncoding, 37 AutoDetectedEncoding, 38 EncodingFromXMLHeader, 39 EncodingFromMetaTag, 40 EncodingFromCSSCharset, 41 EncodingFromHTTPHeader, 42 UserChosenEncoding, 43 EncodingFromParentFrame 44 }; 45 46 static PassRefPtr<TextResourceDecoder> create(const String& mimeType, const TextEncoding& defaultEncoding = TextEncoding(), bool usesEncodingDetector = false) 47 { 48 return adoptRef(new TextResourceDecoder(mimeType, defaultEncoding, usesEncodingDetector)); 49 } 50 ~TextResourceDecoder(); 51 52 void setEncoding(const TextEncoding&, EncodingSource); 53 const TextEncoding& encoding() const { return m_encoding; } 54 55 String decode(const char* data, size_t length); 56 String flush(); 57 58 String decodeAndFlush(const char* data, size_t length); 59 60 void setHintEncoding(const TextResourceDecoder* hintDecoder) 61 { 62 // hintEncoding is for use with autodetection, which should be 63 // only invoked when hintEncoding comes from auto-detection. 64 if (hintDecoder && hintDecoder->m_source == AutoDetectedEncoding) 65 m_hintEncoding = hintDecoder->encoding().name(); 66 } 67 68 void useLenientXMLDecoding() { m_useLenientXMLDecoding = true; } 69 bool sawError() const { return m_sawError; } 70 71private: 72 TextResourceDecoder(const String& mimeType, const TextEncoding& defaultEncoding, bool usesEncodingDetector); 73 74 enum ContentType { PlainText, HTML, XML, CSS }; // PlainText only checks for BOM. 75 static ContentType determineContentType(const String& mimeType); 76 static const TextEncoding& defaultEncoding(ContentType, const TextEncoding& defaultEncoding); 77 78 size_t checkForBOM(const char*, size_t); 79 bool checkForCSSCharset(const char*, size_t, bool& movedDataToBuffer); 80 bool checkForHeadCharset(const char*, size_t, bool& movedDataToBuffer); 81 bool checkForMetaCharset(const char*, size_t); 82 void detectJapaneseEncoding(const char*, size_t); 83 bool shouldAutoDetect() const; 84 85 ContentType m_contentType; 86 TextEncoding m_encoding; 87 OwnPtr<TextCodec> m_codec; 88 EncodingSource m_source; 89 const char* m_hintEncoding; 90 Vector<char> m_buffer; 91 bool m_checkedForBOM; 92 bool m_checkedForCSSCharset; 93 bool m_checkedForHeadCharset; 94 bool m_useLenientXMLDecoding; // Don't stop on XML decoding errors. 95 bool m_sawError; 96 bool m_usesEncodingDetector; 97 98 std::unique_ptr<HTMLMetaCharsetParser> m_charsetParser; 99}; 100 101} 102 103#endif 104