1/* 2 * Copyright (C) 2013 Google, Inc. All Rights Reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#ifndef AtomicHTMLToken_h 27#define AtomicHTMLToken_h 28 29#include "Attribute.h" 30#include "HTMLToken.h" 31#include <wtf/RefCounted.h> 32#include <wtf/RefPtr.h> 33 34namespace WebCore { 35 36class AtomicHTMLToken { 37 WTF_MAKE_NONCOPYABLE(AtomicHTMLToken); 38public: 39 40 bool forceQuirks() const 41 { 42 ASSERT(m_type == HTMLToken::DOCTYPE); 43 return m_doctypeData->m_forceQuirks; 44 } 45 46 HTMLToken::Type type() const { return m_type; } 47 48 const AtomicString& name() const 49 { 50 ASSERT(usesName()); 51 return m_name; 52 } 53 54 void setName(const AtomicString& name) 55 { 56 ASSERT(usesName()); 57 m_name = name; 58 } 59 60 bool selfClosing() const 61 { 62 ASSERT(m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag); 63 return m_selfClosing; 64 } 65 66 Attribute* getAttributeItem(const QualifiedName& attributeName) 67 { 68 ASSERT(usesAttributes()); 69 return findAttributeInVector(m_attributes, attributeName); 70 } 71 72 Vector<Attribute>& attributes() 73 { 74 ASSERT(usesAttributes()); 75 return m_attributes; 76 } 77 78 const Vector<Attribute>& attributes() const 79 { 80 ASSERT(usesAttributes()); 81 return m_attributes; 82 } 83 84 const UChar* characters() const 85 { 86 ASSERT(m_type == HTMLToken::Character); 87 return m_externalCharacters; 88 } 89 90 size_t charactersLength() const 91 { 92 ASSERT(m_type == HTMLToken::Character); 93 return m_externalCharactersLength; 94 } 95 96 bool isAll8BitData() const 97 { 98 return m_isAll8BitData; 99 } 100 101 const String& comment() const 102 { 103 ASSERT(m_type == HTMLToken::Comment); 104 return m_data; 105 } 106 107 // FIXME: Distinguish between a missing public identifer and an empty one. 108 Vector<UChar>& publicIdentifier() const 109 { 110 ASSERT(m_type == HTMLToken::DOCTYPE); 111 return m_doctypeData->m_publicIdentifier; 112 } 113 114 // FIXME: Distinguish between a missing system identifer and an empty one. 115 Vector<UChar>& systemIdentifier() const 116 { 117 ASSERT(m_type == HTMLToken::DOCTYPE); 118 return m_doctypeData->m_systemIdentifier; 119 } 120 121 explicit AtomicHTMLToken(HTMLToken& token) 122 : m_type(token.type()) 123 { 124 switch (m_type) { 125 case HTMLToken::Uninitialized: 126 ASSERT_NOT_REACHED(); 127 break; 128 case HTMLToken::DOCTYPE: 129 m_name = AtomicString(token.name()); 130 m_doctypeData = token.releaseDoctypeData(); 131 break; 132 case HTMLToken::EndOfFile: 133 break; 134 case HTMLToken::StartTag: 135 case HTMLToken::EndTag: { 136 m_selfClosing = token.selfClosing(); 137 m_name = AtomicString(token.name()); 138 initializeAttributes(token.attributes()); 139 break; 140 } 141 case HTMLToken::Comment: 142 if (token.isAll8BitData()) 143 m_data = String::make8BitFrom16BitSource(token.comment()); 144 else 145 m_data = String(token.comment()); 146 break; 147 case HTMLToken::Character: 148 m_externalCharacters = token.characters().data(); 149 m_externalCharactersLength = token.characters().size(); 150 m_isAll8BitData = token.isAll8BitData(); 151 break; 152 } 153 } 154 155 explicit AtomicHTMLToken(HTMLToken::Type type) 156 : m_type(type) 157 , m_externalCharacters(0) 158 , m_externalCharactersLength(0) 159 , m_isAll8BitData(false) 160 , m_selfClosing(false) 161 { 162 } 163 164 AtomicHTMLToken(HTMLToken::Type type, const AtomicString& name, const Vector<Attribute>& attributes = Vector<Attribute>()) 165 : m_type(type) 166 , m_name(name) 167 , m_externalCharacters(0) 168 , m_externalCharactersLength(0) 169 , m_isAll8BitData(false) 170 , m_selfClosing(false) 171 , m_attributes(attributes) 172 { 173 ASSERT(usesName()); 174 } 175 176private: 177 HTMLToken::Type m_type; 178 179 void initializeAttributes(const HTMLToken::AttributeList& attributes); 180 QualifiedName nameForAttribute(const HTMLToken::Attribute&) const; 181 182 bool usesName() const; 183 184 bool usesAttributes() const; 185 186 // "name" for DOCTYPE, StartTag, and EndTag 187 AtomicString m_name; 188 189 // "data" for Comment 190 String m_data; 191 192 // "characters" for Character 193 // 194 // We don't want to copy the the characters out of the Token, so we 195 // keep a pointer to its buffer instead. This buffer is owned by the 196 // Token and causes a lifetime dependence between these objects. 197 // 198 // FIXME: Add a mechanism for "internalizing" the characters when the 199 // HTMLToken is destructed. 200 const UChar* m_externalCharacters; 201 size_t m_externalCharactersLength; 202 bool m_isAll8BitData; 203 204 // For DOCTYPE 205 std::unique_ptr<DoctypeData> m_doctypeData; 206 207 // For StartTag and EndTag 208 bool m_selfClosing; 209 210 Vector<Attribute> m_attributes; 211}; 212 213inline void AtomicHTMLToken::initializeAttributes(const HTMLToken::AttributeList& attributes) 214{ 215 size_t size = attributes.size(); 216 if (!size) 217 return; 218 219 m_attributes.clear(); 220 m_attributes.reserveInitialCapacity(size); 221 for (size_t i = 0; i < size; ++i) { 222 const HTMLToken::Attribute& attribute = attributes[i]; 223 if (attribute.name.isEmpty()) 224 continue; 225 226 // FIXME: We should be able to add the following ASSERT once we fix 227 // https://bugs.webkit.org/show_bug.cgi?id=62971 228 // ASSERT(attribute.nameRange.start); 229 ASSERT(attribute.nameRange.end); 230 ASSERT(attribute.valueRange.start); 231 ASSERT(attribute.valueRange.end); 232 233 AtomicString value(attribute.value); 234 const QualifiedName& name = nameForAttribute(attribute); 235 // FIXME: This is N^2 for the number of attributes. 236 if (!findAttributeInVector(m_attributes, name)) 237 m_attributes.append(Attribute(name, value)); 238 } 239} 240 241} 242 243#endif 244