1/* 2 * Copyright (C) 2011, 2013 Google Inc. All rights reserved. 3 * Copyright (C) 2014 Apple Inc. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: 8 * 9 * * Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * * Redistributions in binary form must reproduce the above 12 * copyright notice, this list of conditions and the following disclaimer 13 * in the documentation and/or other materials provided with the 14 * distribution. 15 * * Neither the name of Google Inc. nor the names of its 16 * contributors may be used to endorse or promote products derived from 17 * this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32#include "config.h" 33 34#if ENABLE(VIDEO_TRACK) 35 36#include "WebVTTTokenizer.h" 37 38#include "MarkupTokenizerInlines.h" 39#include <wtf/text/StringBuilder.h> 40#include <wtf/unicode/CharacterNames.h> 41 42namespace WebCore { 43 44#define WEBVTT_BEGIN_STATE(stateName) case stateName: stateName: 45#define WEBVTT_ADVANCE_TO(stateName) \ 46 do { \ 47 state = stateName; \ 48 ASSERT(!m_input.isEmpty()); \ 49 m_inputStreamPreprocessor.advance(m_input); \ 50 cc = m_inputStreamPreprocessor.nextInputCharacter(); \ 51 goto stateName; \ 52 } while (false) 53 54 55template<unsigned charactersCount> 56ALWAYS_INLINE bool equalLiteral(const StringBuilder& s, const char (&characters)[charactersCount]) 57{ 58 return WTF::equal(s, reinterpret_cast<const LChar*>(characters), charactersCount - 1); 59} 60 61static void addNewClass(StringBuilder& classes, const StringBuilder& newClass) 62{ 63 if (!classes.isEmpty()) 64 classes.append(' '); 65 classes.append(newClass); 66} 67 68inline bool emitToken(WebVTTToken& resultToken, const WebVTTToken& token) 69{ 70 resultToken = token; 71 return true; 72} 73 74inline bool advanceAndEmitToken(SegmentedString& source, WebVTTToken& resultToken, const WebVTTToken& token) 75{ 76 source.advanceAndUpdateLineNumber(); 77 return emitToken(resultToken, token); 78} 79 80WebVTTTokenizer::WebVTTTokenizer(const String& input) 81 : m_input(input) 82 , m_inputStreamPreprocessor(this) 83{ 84 // Append an EOF marker and close the input "stream". 85 ASSERT(!m_input.isClosed()); 86 m_input.append(SegmentedString(String(&kEndOfFileMarker, 1))); 87 m_input.close(); 88} 89 90bool WebVTTTokenizer::nextToken(WebVTTToken& token) 91{ 92 if (m_input.isEmpty() || !m_inputStreamPreprocessor.peek(m_input)) 93 return false; 94 95 UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); 96 if (cc == kEndOfFileMarker) { 97 m_inputStreamPreprocessor.advance(m_input); 98 return false; 99 } 100 101 StringBuilder buffer; 102 StringBuilder result; 103 StringBuilder classes; 104 105 enum { 106 DataState, 107 EscapeState, 108 TagState, 109 StartTagState, 110 StartTagClassState, 111 StartTagAnnotationState, 112 EndTagState, 113 TimestampTagState, 114 } state = DataState; 115 116 // 4.8.10.13.4 WebVTT cue text tokenizer 117 switch (state) { 118 WEBVTT_BEGIN_STATE(DataState) { 119 if (cc == '&') { 120 buffer.append(static_cast<LChar>(cc)); 121 WEBVTT_ADVANCE_TO(EscapeState); 122 } else if (cc == '<') { 123 if (result.isEmpty()) 124 WEBVTT_ADVANCE_TO(TagState); 125 else { 126 // We don't want to advance input or perform a state transition - just return a (new) token. 127 // (On the next call to nextToken we will see '<' again, but take the other branch in this if instead.) 128 return emitToken(token, WebVTTToken::StringToken(result.toString())); 129 } 130 } else if (cc == kEndOfFileMarker) 131 return advanceAndEmitToken(m_input, token, WebVTTToken::StringToken(result.toString())); 132 else { 133 result.append(cc); 134 WEBVTT_ADVANCE_TO(DataState); 135 } 136 } 137 END_STATE() 138 139 WEBVTT_BEGIN_STATE(EscapeState) { 140 if (cc == ';') { 141 if (equalLiteral(buffer, "&")) 142 result.append('&'); 143 else if (equalLiteral(buffer, "<")) 144 result.append('<'); 145 else if (equalLiteral(buffer, ">")) 146 result.append('>'); 147 else if (equalLiteral(buffer, "&lrm")) 148 result.append(leftToRightMark); 149 else if (equalLiteral(buffer, "&rlm")) 150 result.append(rightToLeftMark); 151 else if (equalLiteral(buffer, " ")) 152 result.append(noBreakSpace); 153 else { 154 buffer.append(static_cast<LChar>(cc)); 155 result.append(buffer); 156 } 157 buffer.clear(); 158 WEBVTT_ADVANCE_TO(DataState); 159 } else if (isASCIIAlphanumeric(cc)) { 160 buffer.append(static_cast<LChar>(cc)); 161 WEBVTT_ADVANCE_TO(EscapeState); 162 } else if (cc == '<') { 163 result.append(buffer); 164 return emitToken(token, WebVTTToken::StringToken(result.toString())); 165 } else if (cc == kEndOfFileMarker) { 166 result.append(buffer); 167 return advanceAndEmitToken(m_input, token, WebVTTToken::StringToken(result.toString())); 168 } else { 169 result.append(buffer); 170 buffer.clear(); 171 172 if (cc == '&') { 173 buffer.append(static_cast<LChar>(cc)); 174 WEBVTT_ADVANCE_TO(EscapeState); 175 } 176 result.append(cc); 177 WEBVTT_ADVANCE_TO(DataState); 178 } 179 } 180 END_STATE() 181 182 WEBVTT_BEGIN_STATE(TagState) { 183 if (isTokenizerWhitespace(cc)) { 184 ASSERT(result.isEmpty()); 185 WEBVTT_ADVANCE_TO(StartTagAnnotationState); 186 } else if (cc == '.') { 187 ASSERT(result.isEmpty()); 188 WEBVTT_ADVANCE_TO(StartTagClassState); 189 } else if (cc == '/') { 190 WEBVTT_ADVANCE_TO(EndTagState); 191 } else if (WTF::isASCIIDigit(cc)) { 192 result.append(cc); 193 WEBVTT_ADVANCE_TO(TimestampTagState); 194 } else if (cc == '>' || cc == kEndOfFileMarker) { 195 ASSERT(result.isEmpty()); 196 return advanceAndEmitToken(m_input, token, WebVTTToken::StartTag(result.toString())); 197 } else { 198 result.append(cc); 199 WEBVTT_ADVANCE_TO(StartTagState); 200 } 201 } 202 END_STATE() 203 204 WEBVTT_BEGIN_STATE(StartTagState) { 205 if (isTokenizerWhitespace(cc)) 206 WEBVTT_ADVANCE_TO(StartTagAnnotationState); 207 else if (cc == '.') 208 WEBVTT_ADVANCE_TO(StartTagClassState); 209 else if (cc == '>' || cc == kEndOfFileMarker) 210 return advanceAndEmitToken(m_input, token, WebVTTToken::StartTag(result.toString())); 211 else { 212 result.append(cc); 213 WEBVTT_ADVANCE_TO(StartTagState); 214 } 215 } 216 END_STATE() 217 218 WEBVTT_BEGIN_STATE(StartTagClassState) { 219 if (isTokenizerWhitespace(cc)) { 220 addNewClass(classes, buffer); 221 buffer.clear(); 222 WEBVTT_ADVANCE_TO(StartTagAnnotationState); 223 } else if (cc == '.') { 224 addNewClass(classes, buffer); 225 buffer.clear(); 226 WEBVTT_ADVANCE_TO(StartTagClassState); 227 } else if (cc == '>' || cc == kEndOfFileMarker) { 228 addNewClass(classes, buffer); 229 buffer.clear(); 230 return advanceAndEmitToken(m_input, token, WebVTTToken::StartTag(result.toString(), classes.toAtomicString())); 231 } else { 232 buffer.append(cc); 233 WEBVTT_ADVANCE_TO(StartTagClassState); 234 } 235 236 } 237 END_STATE() 238 239 WEBVTT_BEGIN_STATE(StartTagAnnotationState) { 240 if (cc == '>' || cc == kEndOfFileMarker) { 241 return advanceAndEmitToken(m_input, token, WebVTTToken::StartTag(result.toString(), classes.toAtomicString(), buffer.toAtomicString())); 242 } 243 buffer.append(cc); 244 WEBVTT_ADVANCE_TO(StartTagAnnotationState); 245 } 246 END_STATE() 247 248 WEBVTT_BEGIN_STATE(EndTagState) { 249 if (cc == '>' || cc == kEndOfFileMarker) 250 return advanceAndEmitToken(m_input, token, WebVTTToken::EndTag(result.toString())); 251 result.append(cc); 252 WEBVTT_ADVANCE_TO(EndTagState); 253 } 254 END_STATE() 255 256 WEBVTT_BEGIN_STATE(TimestampTagState) { 257 if (cc == '>' || cc == kEndOfFileMarker) 258 return advanceAndEmitToken(m_input, token, WebVTTToken::TimestampTag(result.toString())); 259 result.append(cc); 260 WEBVTT_ADVANCE_TO(TimestampTagState); 261 } 262 END_STATE() 263 264 } 265 266 ASSERT_NOT_REACHED(); 267 return false; 268} 269 270} 271 272#endif 273