1/*
2 * Copyright (C) 2013 Google, Inc. All Rights Reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#ifndef AtomicHTMLToken_h
27#define AtomicHTMLToken_h
28
29#include "Attribute.h"
30#include "HTMLToken.h"
31#include <wtf/RefCounted.h>
32#include <wtf/RefPtr.h>
33
34namespace WebCore {
35
36class AtomicHTMLToken {
37    WTF_MAKE_NONCOPYABLE(AtomicHTMLToken);
38public:
39
40    bool forceQuirks() const
41    {
42        ASSERT(m_type == HTMLToken::DOCTYPE);
43        return m_doctypeData->m_forceQuirks;
44    }
45
46    HTMLToken::Type type() const { return m_type; }
47
48    const AtomicString& name() const
49    {
50        ASSERT(usesName());
51        return m_name;
52    }
53
54    void setName(const AtomicString& name)
55    {
56        ASSERT(usesName());
57        m_name = name;
58    }
59
60    bool selfClosing() const
61    {
62        ASSERT(m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag);
63        return m_selfClosing;
64    }
65
66    Attribute* getAttributeItem(const QualifiedName& attributeName)
67    {
68        ASSERT(usesAttributes());
69        return findAttributeInVector(m_attributes, attributeName);
70    }
71
72    Vector<Attribute>& attributes()
73    {
74        ASSERT(usesAttributes());
75        return m_attributes;
76    }
77
78    const Vector<Attribute>& attributes() const
79    {
80        ASSERT(usesAttributes());
81        return m_attributes;
82    }
83
84    const UChar* characters() const
85    {
86        ASSERT(m_type == HTMLToken::Character);
87        return m_externalCharacters;
88    }
89
90    size_t charactersLength() const
91    {
92        ASSERT(m_type == HTMLToken::Character);
93        return m_externalCharactersLength;
94    }
95
96    bool isAll8BitData() const
97    {
98        return m_isAll8BitData;
99    }
100
101    const String& comment() const
102    {
103        ASSERT(m_type == HTMLToken::Comment);
104        return m_data;
105    }
106
107    // FIXME: Distinguish between a missing public identifer and an empty one.
108    Vector<UChar>& publicIdentifier() const
109    {
110        ASSERT(m_type == HTMLToken::DOCTYPE);
111        return m_doctypeData->m_publicIdentifier;
112    }
113
114    // FIXME: Distinguish between a missing system identifer and an empty one.
115    Vector<UChar>& systemIdentifier() const
116    {
117        ASSERT(m_type == HTMLToken::DOCTYPE);
118        return m_doctypeData->m_systemIdentifier;
119    }
120
121    explicit AtomicHTMLToken(HTMLToken& token)
122        : m_type(token.type())
123    {
124        switch (m_type) {
125        case HTMLToken::Uninitialized:
126            ASSERT_NOT_REACHED();
127            break;
128        case HTMLToken::DOCTYPE:
129            m_name = AtomicString(token.name());
130            m_doctypeData = token.releaseDoctypeData();
131            break;
132        case HTMLToken::EndOfFile:
133            break;
134        case HTMLToken::StartTag:
135        case HTMLToken::EndTag: {
136            m_selfClosing = token.selfClosing();
137            m_name = AtomicString(token.name());
138            initializeAttributes(token.attributes());
139            break;
140        }
141        case HTMLToken::Comment:
142            if (token.isAll8BitData())
143                m_data = String::make8BitFrom16BitSource(token.comment());
144            else
145                m_data = String(token.comment());
146            break;
147        case HTMLToken::Character:
148            m_externalCharacters = token.characters().data();
149            m_externalCharactersLength = token.characters().size();
150            m_isAll8BitData = token.isAll8BitData();
151            break;
152        }
153    }
154
155    explicit AtomicHTMLToken(HTMLToken::Type type)
156        : m_type(type)
157        , m_externalCharacters(0)
158        , m_externalCharactersLength(0)
159        , m_isAll8BitData(false)
160        , m_selfClosing(false)
161    {
162    }
163
164    AtomicHTMLToken(HTMLToken::Type type, const AtomicString& name, const Vector<Attribute>& attributes = Vector<Attribute>())
165        : m_type(type)
166        , m_name(name)
167        , m_externalCharacters(0)
168        , m_externalCharactersLength(0)
169        , m_isAll8BitData(false)
170        , m_selfClosing(false)
171        , m_attributes(attributes)
172    {
173        ASSERT(usesName());
174    }
175
176private:
177    HTMLToken::Type m_type;
178
179    void initializeAttributes(const HTMLToken::AttributeList& attributes);
180    QualifiedName nameForAttribute(const HTMLToken::Attribute&) const;
181
182    bool usesName() const;
183
184    bool usesAttributes() const;
185
186    // "name" for DOCTYPE, StartTag, and EndTag
187    AtomicString m_name;
188
189    // "data" for Comment
190    String m_data;
191
192    // "characters" for Character
193    //
194    // We don't want to copy the the characters out of the Token, so we
195    // keep a pointer to its buffer instead. This buffer is owned by the
196    // Token and causes a lifetime dependence between these objects.
197    //
198    // FIXME: Add a mechanism for "internalizing" the characters when the
199    //        HTMLToken is destructed.
200    const UChar* m_externalCharacters;
201    size_t m_externalCharactersLength;
202    bool m_isAll8BitData;
203
204    // For DOCTYPE
205    std::unique_ptr<DoctypeData> m_doctypeData;
206
207    // For StartTag and EndTag
208    bool m_selfClosing;
209
210    Vector<Attribute> m_attributes;
211};
212
213inline void AtomicHTMLToken::initializeAttributes(const HTMLToken::AttributeList& attributes)
214{
215    size_t size = attributes.size();
216    if (!size)
217        return;
218
219    m_attributes.clear();
220    m_attributes.reserveInitialCapacity(size);
221    for (size_t i = 0; i < size; ++i) {
222        const HTMLToken::Attribute& attribute = attributes[i];
223        if (attribute.name.isEmpty())
224            continue;
225
226        // FIXME: We should be able to add the following ASSERT once we fix
227        // https://bugs.webkit.org/show_bug.cgi?id=62971
228        //   ASSERT(attribute.nameRange.start);
229        ASSERT(attribute.nameRange.end);
230        ASSERT(attribute.valueRange.start);
231        ASSERT(attribute.valueRange.end);
232
233        AtomicString value(attribute.value);
234        const QualifiedName& name = nameForAttribute(attribute);
235        // FIXME: This is N^2 for the number of attributes.
236        if (!findAttributeInVector(m_attributes, name))
237            m_attributes.append(Attribute(name, value));
238    }
239}
240
241}
242
243#endif
244