1/* 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. 3 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#ifndef CharacterReferenceParserInlines_h 28#define CharacterReferenceParserInlines_h 29 30#include <wtf/text/StringBuilder.h> 31 32namespace WebCore { 33 34inline bool isHexDigit(UChar cc) 35{ 36 return (cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f') || (cc >= 'A' && cc <= 'F'); 37} 38 39inline void unconsumeCharacters(SegmentedString& source, const StringBuilder& consumedCharacters) 40{ 41 if (consumedCharacters.length() == 1) 42 source.push(consumedCharacters[0]); 43 else if (consumedCharacters.length() == 2) { 44 source.push(consumedCharacters[0]); 45 source.push(consumedCharacters[1]); 46 } else 47 source.prepend(SegmentedString(consumedCharacters.toStringPreserveCapacity())); 48} 49 50template <typename ParserFunctions> 51bool consumeCharacterReference(SegmentedString& source, StringBuilder& decodedCharacter, bool& notEnoughCharacters, UChar additionalAllowedCharacter) 52{ 53 ASSERT(!additionalAllowedCharacter || additionalAllowedCharacter == '"' || additionalAllowedCharacter == '\'' || additionalAllowedCharacter == '>'); 54 ASSERT(!notEnoughCharacters); 55 ASSERT(decodedCharacter.isEmpty()); 56 57 enum EntityState { 58 Initial, 59 Number, 60 MaybeHexLowerCaseX, 61 MaybeHexUpperCaseX, 62 Hex, 63 Decimal, 64 Named 65 }; 66 EntityState entityState = Initial; 67 UChar32 result = 0; 68 StringBuilder consumedCharacters; 69 70 while (!source.isEmpty()) { 71 UChar cc = source.currentChar(); 72 switch (entityState) { 73 case Initial: { 74 if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ' || cc == '<' || cc == '&') 75 return false; 76 if (additionalAllowedCharacter && cc == additionalAllowedCharacter) 77 return false; 78 if (cc == '#') { 79 entityState = Number; 80 break; 81 } 82 if ((cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z')) { 83 entityState = Named; 84 continue; 85 } 86 return false; 87 } 88 case Number: { 89 if (cc == 'x') { 90 entityState = MaybeHexLowerCaseX; 91 break; 92 } 93 if (cc == 'X') { 94 entityState = MaybeHexUpperCaseX; 95 break; 96 } 97 if (cc >= '0' && cc <= '9') { 98 entityState = Decimal; 99 continue; 100 } 101 source.push('#'); 102 return false; 103 } 104 case MaybeHexLowerCaseX: { 105 if (isHexDigit(cc)) { 106 entityState = Hex; 107 continue; 108 } 109 source.push('#'); 110 source.push('x'); 111 return false; 112 } 113 case MaybeHexUpperCaseX: { 114 if (isHexDigit(cc)) { 115 entityState = Hex; 116 continue; 117 } 118 source.push('#'); 119 source.push('X'); 120 return false; 121 } 122 case Hex: { 123 if (cc >= '0' && cc <= '9') 124 result = result * 16 + cc - '0'; 125 else if (cc >= 'a' && cc <= 'f') 126 result = result * 16 + 10 + cc - 'a'; 127 else if (cc >= 'A' && cc <= 'F') 128 result = result * 16 + 10 + cc - 'A'; 129 else if (cc == ';') { 130 source.advanceAndASSERT(cc); 131 decodedCharacter.append(ParserFunctions::legalEntityFor(result)); 132 return true; 133 } else if (ParserFunctions::acceptMalformed()) { 134 decodedCharacter.append(ParserFunctions::legalEntityFor(result)); 135 return true; 136 } else { 137 unconsumeCharacters(source, consumedCharacters); 138 return false; 139 } 140 break; 141 } 142 case Decimal: { 143 if (cc >= '0' && cc <= '9') 144 result = result * 10 + cc - '0'; 145 else if (cc == ';') { 146 source.advanceAndASSERT(cc); 147 decodedCharacter.append(ParserFunctions::legalEntityFor(result)); 148 return true; 149 } else if (ParserFunctions::acceptMalformed()) { 150 decodedCharacter.append(ParserFunctions::legalEntityFor(result)); 151 return true; 152 } else { 153 unconsumeCharacters(source, consumedCharacters); 154 return false; 155 } 156 break; 157 } 158 case Named: { 159 return ParserFunctions::consumeNamedEntity(source, decodedCharacter, notEnoughCharacters, additionalAllowedCharacter, cc); 160 } 161 } 162 consumedCharacters.append(cc); 163 source.advanceAndASSERT(cc); 164 } 165 ASSERT(source.isEmpty()); 166 notEnoughCharacters = true; 167 unconsumeCharacters(source, consumedCharacters); 168 return false; 169} 170 171} 172 173#endif // CharacterReferenceParserInlines_h 174