1/*
2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#ifndef CharacterReferenceParserInlines_h
28#define CharacterReferenceParserInlines_h
29
30#include <wtf/text/StringBuilder.h>
31
32namespace WebCore {
33
34inline bool isHexDigit(UChar cc)
35{
36    return (cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f') || (cc >= 'A' && cc <= 'F');
37}
38
39inline void unconsumeCharacters(SegmentedString& source, const StringBuilder& consumedCharacters)
40{
41    if (consumedCharacters.length() == 1)
42        source.push(consumedCharacters[0]);
43    else if (consumedCharacters.length() == 2) {
44        source.push(consumedCharacters[0]);
45        source.push(consumedCharacters[1]);
46    } else
47        source.prepend(SegmentedString(consumedCharacters.toStringPreserveCapacity()));
48}
49
50template <typename ParserFunctions>
51bool consumeCharacterReference(SegmentedString& source, StringBuilder& decodedCharacter, bool& notEnoughCharacters, UChar additionalAllowedCharacter)
52{
53    ASSERT(!additionalAllowedCharacter || additionalAllowedCharacter == '"' || additionalAllowedCharacter == '\'' || additionalAllowedCharacter == '>');
54    ASSERT(!notEnoughCharacters);
55    ASSERT(decodedCharacter.isEmpty());
56
57    enum EntityState {
58        Initial,
59        Number,
60        MaybeHexLowerCaseX,
61        MaybeHexUpperCaseX,
62        Hex,
63        Decimal,
64        Named
65    };
66    EntityState entityState = Initial;
67    UChar32 result = 0;
68    StringBuilder consumedCharacters;
69
70    while (!source.isEmpty()) {
71        UChar cc = source.currentChar();
72        switch (entityState) {
73        case Initial: {
74            if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ' || cc == '<' || cc == '&')
75                return false;
76            if (additionalAllowedCharacter && cc == additionalAllowedCharacter)
77                return false;
78            if (cc == '#') {
79                entityState = Number;
80                break;
81            }
82            if ((cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z')) {
83                entityState = Named;
84                continue;
85            }
86            return false;
87        }
88        case Number: {
89            if (cc == 'x') {
90                entityState = MaybeHexLowerCaseX;
91                break;
92            }
93            if (cc == 'X') {
94                entityState = MaybeHexUpperCaseX;
95                break;
96            }
97            if (cc >= '0' && cc <= '9') {
98                entityState = Decimal;
99                continue;
100            }
101            source.push('#');
102            return false;
103        }
104        case MaybeHexLowerCaseX: {
105            if (isHexDigit(cc)) {
106                entityState = Hex;
107                continue;
108            }
109            source.push('#');
110            source.push('x');
111            return false;
112        }
113        case MaybeHexUpperCaseX: {
114            if (isHexDigit(cc)) {
115                entityState = Hex;
116                continue;
117            }
118            source.push('#');
119            source.push('X');
120            return false;
121        }
122        case Hex: {
123            if (cc >= '0' && cc <= '9')
124                result = result * 16 + cc - '0';
125            else if (cc >= 'a' && cc <= 'f')
126                result = result * 16 + 10 + cc - 'a';
127            else if (cc >= 'A' && cc <= 'F')
128                result = result * 16 + 10 + cc - 'A';
129            else if (cc == ';') {
130                source.advanceAndASSERT(cc);
131                decodedCharacter.append(ParserFunctions::legalEntityFor(result));
132                return true;
133            } else if (ParserFunctions::acceptMalformed()) {
134                decodedCharacter.append(ParserFunctions::legalEntityFor(result));
135                return true;
136            } else {
137                unconsumeCharacters(source, consumedCharacters);
138                return false;
139            }
140            break;
141        }
142        case Decimal: {
143            if (cc >= '0' && cc <= '9')
144                result = result * 10 + cc - '0';
145            else if (cc == ';') {
146                source.advanceAndASSERT(cc);
147                decodedCharacter.append(ParserFunctions::legalEntityFor(result));
148                return true;
149            } else if (ParserFunctions::acceptMalformed()) {
150                decodedCharacter.append(ParserFunctions::legalEntityFor(result));
151                return true;
152            } else {
153                unconsumeCharacters(source, consumedCharacters);
154                return false;
155            }
156            break;
157        }
158        case Named: {
159            return ParserFunctions::consumeNamedEntity(source, decodedCharacter, notEnoughCharacters, additionalAllowedCharacter, cc);
160        }
161        }
162        consumedCharacters.append(cc);
163        source.advanceAndASSERT(cc);
164    }
165    ASSERT(source.isEmpty());
166    notEnoughCharacters = true;
167    unconsumeCharacters(source, consumedCharacters);
168    return false;
169}
170
171}
172
173#endif // CharacterReferenceParserInlines_h
174