1/* 2 * Copyright (C) 2013 Google, Inc. All Rights Reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#ifndef HTMLToken_h 27#define HTMLToken_h 28 29#include "Attribute.h" 30#include "HTMLToken.h" 31#include <wtf/RefCounted.h> 32#include <wtf/RefPtr.h> 33#include <wtf/text/StringView.h> 34 35namespace WebCore { 36 37class DoctypeData { 38 WTF_MAKE_NONCOPYABLE(DoctypeData); 39public: 40 DoctypeData() 41 : m_hasPublicIdentifier(false) 42 , m_hasSystemIdentifier(false) 43 , m_forceQuirks(false) 44 { 45 } 46 47 // FIXME: This should use String instead of Vector<UChar>. 48 bool m_hasPublicIdentifier; 49 bool m_hasSystemIdentifier; 50 WTF::Vector<UChar> m_publicIdentifier; 51 WTF::Vector<UChar> m_systemIdentifier; 52 bool m_forceQuirks; 53}; 54 55static inline Attribute* findAttributeInVector(Vector<Attribute>& attributes, const QualifiedName& name) 56{ 57 for (unsigned i = 0; i < attributes.size(); ++i) { 58 if (attributes.at(i).name().matches(name)) 59 return &attributes.at(i); 60 } 61 return 0; 62} 63 64class HTMLToken { 65 WTF_MAKE_NONCOPYABLE(HTMLToken); 66 WTF_MAKE_FAST_ALLOCATED; 67public: 68 enum Type { 69 Uninitialized, 70 DOCTYPE, 71 StartTag, 72 EndTag, 73 Comment, 74 Character, 75 EndOfFile, 76 }; 77 78 class Attribute { 79 public: 80 class Range { 81 public: 82 int start; 83 int end; 84 }; 85 86 Range nameRange; 87 Range valueRange; 88 Vector<UChar, 32> name; 89 Vector<UChar, 32> value; 90 }; 91 92 typedef Vector<Attribute, 10> AttributeList; 93 typedef Vector<UChar, 256> DataVector; 94 95 HTMLToken() { clear(); } 96 97 void clear() 98 { 99 m_type = Uninitialized; 100 m_range.start = 0; 101 m_range.end = 0; 102 m_baseOffset = 0; 103 m_data.clear(); 104 m_orAllData = 0; 105 } 106 107 bool isUninitialized() { return m_type == Uninitialized; } 108 Type type() const { return m_type; } 109 110 void makeEndOfFile() 111 { 112 ASSERT(m_type == Uninitialized); 113 m_type = EndOfFile; 114 } 115 116 /* Range and offset methods exposed for HTMLSourceTracker */ 117 int startIndex() const { return m_range.start; } 118 int endIndex() const { return m_range.end; } 119 120 void setBaseOffset(int offset) 121 { 122 m_baseOffset = offset; 123 } 124 125 void end(int endOffset) 126 { 127 m_range.end = endOffset - m_baseOffset; 128 } 129 130 const DataVector& data() const 131 { 132 ASSERT(m_type == Character || m_type == Comment || m_type == StartTag || m_type == EndTag); 133 return m_data; 134 } 135 136 bool isAll8BitData() const 137 { 138 return (m_orAllData <= 0xff); 139 } 140 141 const DataVector& name() const 142 { 143 ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE); 144 return m_data; 145 } 146 147 void appendToName(UChar character) 148 { 149 ASSERT(m_type == StartTag || m_type == EndTag || m_type == DOCTYPE); 150 ASSERT(character); 151 m_data.append(character); 152 m_orAllData |= character; 153 } 154 155 /* DOCTYPE Tokens */ 156 157 bool forceQuirks() const 158 { 159 ASSERT(m_type == DOCTYPE); 160 return m_doctypeData->m_forceQuirks; 161 } 162 163 void setForceQuirks() 164 { 165 ASSERT(m_type == DOCTYPE); 166 m_doctypeData->m_forceQuirks = true; 167 } 168 169 void beginDOCTYPE() 170 { 171 ASSERT(m_type == Uninitialized); 172 m_type = DOCTYPE; 173 m_doctypeData = std::make_unique<DoctypeData>(); 174 } 175 176 void beginDOCTYPE(UChar character) 177 { 178 ASSERT(character); 179 beginDOCTYPE(); 180 m_data.append(character); 181 m_orAllData |= character; 182 } 183 184 // FIXME: Distinguish between a missing public identifer and an empty one. 185 const WTF::Vector<UChar>& publicIdentifier() const 186 { 187 ASSERT(m_type == DOCTYPE); 188 return m_doctypeData->m_publicIdentifier; 189 } 190 191 // FIXME: Distinguish between a missing system identifer and an empty one. 192 const WTF::Vector<UChar>& systemIdentifier() const 193 { 194 ASSERT(m_type == DOCTYPE); 195 return m_doctypeData->m_systemIdentifier; 196 } 197 198 void setPublicIdentifierToEmptyString() 199 { 200 ASSERT(m_type == DOCTYPE); 201 m_doctypeData->m_hasPublicIdentifier = true; 202 m_doctypeData->m_publicIdentifier.clear(); 203 } 204 205 void setSystemIdentifierToEmptyString() 206 { 207 ASSERT(m_type == DOCTYPE); 208 m_doctypeData->m_hasSystemIdentifier = true; 209 m_doctypeData->m_systemIdentifier.clear(); 210 } 211 212 void appendToPublicIdentifier(UChar character) 213 { 214 ASSERT(character); 215 ASSERT(m_type == DOCTYPE); 216 ASSERT(m_doctypeData->m_hasPublicIdentifier); 217 m_doctypeData->m_publicIdentifier.append(character); 218 } 219 220 void appendToSystemIdentifier(UChar character) 221 { 222 ASSERT(character); 223 ASSERT(m_type == DOCTYPE); 224 ASSERT(m_doctypeData->m_hasSystemIdentifier); 225 m_doctypeData->m_systemIdentifier.append(character); 226 } 227 228 std::unique_ptr<DoctypeData> releaseDoctypeData() 229 { 230 return WTF::move(m_doctypeData); 231 } 232 233 /* Start/End Tag Tokens */ 234 235 bool selfClosing() const 236 { 237 ASSERT(m_type == StartTag || m_type == EndTag); 238 return m_selfClosing; 239 } 240 241 void setSelfClosing() 242 { 243 ASSERT(m_type == StartTag || m_type == EndTag); 244 m_selfClosing = true; 245 } 246 247 void beginStartTag(UChar character) 248 { 249 ASSERT(character); 250 ASSERT(m_type == Uninitialized); 251 m_type = StartTag; 252 m_selfClosing = false; 253 m_currentAttribute = 0; 254 m_attributes.clear(); 255 256 m_data.append(character); 257 m_orAllData |= character; 258 } 259 260 void beginEndTag(LChar character) 261 { 262 ASSERT(m_type == Uninitialized); 263 m_type = EndTag; 264 m_selfClosing = false; 265 m_currentAttribute = 0; 266 m_attributes.clear(); 267 268 m_data.append(character); 269 } 270 271 void beginEndTag(const Vector<LChar, 32>& characters) 272 { 273 ASSERT(m_type == Uninitialized); 274 m_type = EndTag; 275 m_selfClosing = false; 276 m_currentAttribute = 0; 277 m_attributes.clear(); 278 279 m_data.appendVector(characters); 280 } 281 282 void addNewAttribute() 283 { 284 ASSERT(m_type == StartTag || m_type == EndTag); 285 m_attributes.grow(m_attributes.size() + 1); 286 m_currentAttribute = &m_attributes.last(); 287#ifndef NDEBUG 288 m_currentAttribute->nameRange.start = 0; 289 m_currentAttribute->nameRange.end = 0; 290 m_currentAttribute->valueRange.start = 0; 291 m_currentAttribute->valueRange.end = 0; 292#endif 293 } 294 295 void beginAttributeName(int offset) 296 { 297 m_currentAttribute->nameRange.start = offset - m_baseOffset; 298 } 299 300 void endAttributeName(int offset) 301 { 302 int index = offset - m_baseOffset; 303 m_currentAttribute->nameRange.end = index; 304 m_currentAttribute->valueRange.start = index; 305 m_currentAttribute->valueRange.end = index; 306 } 307 308 void beginAttributeValue(int offset) 309 { 310 m_currentAttribute->valueRange.start = offset - m_baseOffset; 311#ifndef NDEBUG 312 m_currentAttribute->valueRange.end = 0; 313#endif 314 } 315 316 void endAttributeValue(int offset) 317 { 318 m_currentAttribute->valueRange.end = offset - m_baseOffset; 319 } 320 321 void appendToAttributeName(UChar character) 322 { 323 ASSERT(character); 324 ASSERT(m_type == StartTag || m_type == EndTag); 325 // FIXME: We should be able to add the following ASSERT once we fix 326 // https://bugs.webkit.org/show_bug.cgi?id=62971 327 // ASSERT(m_currentAttribute->nameRange.start); 328 m_currentAttribute->name.append(character); 329 } 330 331 void appendToAttributeValue(UChar character) 332 { 333 ASSERT(character); 334 ASSERT(m_type == StartTag || m_type == EndTag); 335 ASSERT(m_currentAttribute->valueRange.start); 336 m_currentAttribute->value.append(character); 337 } 338 339 void appendToAttributeValue(size_t i, StringView value) 340 { 341 ASSERT(!value.isEmpty()); 342 ASSERT(m_type == StartTag || m_type == EndTag); 343 append(m_attributes[i].value, value); 344 } 345 346 const AttributeList& attributes() const 347 { 348 ASSERT(m_type == StartTag || m_type == EndTag); 349 return m_attributes; 350 } 351 352 const Attribute* getAttributeItem(const QualifiedName& name) const 353 { 354 for (unsigned i = 0; i < m_attributes.size(); ++i) { 355 if (AtomicString(m_attributes.at(i).name) == name.localName()) 356 return &m_attributes.at(i); 357 } 358 return 0; 359 } 360 361 // Used by the XSSAuditor to nuke XSS-laden attributes. 362 void eraseValueOfAttribute(size_t i) 363 { 364 ASSERT(m_type == StartTag || m_type == EndTag); 365 m_attributes[i].value.clear(); 366 } 367 368 /* Character Tokens */ 369 370 // Starting a character token works slightly differently than starting 371 // other types of tokens because we want to save a per-character branch. 372 void ensureIsCharacterToken() 373 { 374 ASSERT(m_type == Uninitialized || m_type == Character); 375 m_type = Character; 376 } 377 378 const DataVector& characters() const 379 { 380 ASSERT(m_type == Character); 381 return m_data; 382 } 383 384 void appendToCharacter(char character) 385 { 386 ASSERT(m_type == Character); 387 m_data.append(character); 388 } 389 390 void appendToCharacter(UChar character) 391 { 392 ASSERT(m_type == Character); 393 m_data.append(character); 394 m_orAllData |= character; 395 } 396 397 void appendToCharacter(const Vector<LChar, 32>& characters) 398 { 399 ASSERT(m_type == Character); 400 m_data.appendVector(characters); 401 } 402 403 /* Comment Tokens */ 404 405 const DataVector& comment() const 406 { 407 ASSERT(m_type == Comment); 408 return m_data; 409 } 410 411 void beginComment() 412 { 413 ASSERT(m_type == Uninitialized); 414 m_type = Comment; 415 } 416 417 void appendToComment(UChar character) 418 { 419 ASSERT(character); 420 ASSERT(m_type == Comment); 421 m_data.append(character); 422 m_orAllData |= character; 423 } 424 425 void eraseCharacters() 426 { 427 ASSERT(m_type == Character); 428 m_data.clear(); 429 m_orAllData = 0; 430 } 431 432private: 433 Type m_type; 434 Attribute::Range m_range; // Always starts at zero. 435 int m_baseOffset; 436 DataVector m_data; 437 UChar m_orAllData; 438 439 // For StartTag and EndTag 440 bool m_selfClosing; 441 AttributeList m_attributes; 442 443 // A pointer into m_attributes used during lexing. 444 Attribute* m_currentAttribute; 445 446 // For DOCTYPE 447 std::unique_ptr<DoctypeData> m_doctypeData; 448}; 449 450} 451 452#endif 453