1/* 2 * Copyright (C) 2004, 2006, 2009, 2014 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#ifndef TextIterator_h 27#define TextIterator_h 28 29// FIXME: Move each iterator class into a separate header file. 30 31#include "FindOptions.h" 32#include "Range.h" 33#include "TextIteratorBehavior.h" 34#include <wtf/Vector.h> 35#include <wtf/text/StringView.h> 36 37namespace WebCore { 38 39class InlineTextBox; 40class RenderText; 41class RenderTextFragment; 42 43String plainText(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior, bool isDisplayString = false); 44String plainTextReplacingNoBreakSpace(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior, bool isDisplayString = false); 45PassRefPtr<Range> findPlainText(const Range&, const String&, FindOptions); 46 47// FIXME: Move this somewhere else in the editing directory. It doesn't belong here. 48bool isRendererReplacedElement(RenderObject*); 49 50class BitStack { 51public: 52 BitStack(); 53 ~BitStack(); 54 55 void push(bool); 56 void pop(); 57 58 bool top() const; 59 unsigned size() const; 60 61private: 62 unsigned m_size; 63 Vector<unsigned, 1> m_words; 64}; 65 66class TextIteratorCopyableText { 67public: 68 TextIteratorCopyableText() 69 : m_singleCharacter(0) 70 , m_offset(0) 71 , m_length(0) 72 { 73 } 74 75 StringView text() const { return m_singleCharacter ? StringView(&m_singleCharacter, 1) : StringView(m_string).substring(m_offset, m_length); } 76 void appendToStringBuilder(StringBuilder&) const; 77 78 void reset(); 79 void set(String&&); 80 void set(String&&, unsigned offset, unsigned length); 81 void set(UChar); 82 83private: 84 UChar m_singleCharacter; 85 String m_string; 86 unsigned m_offset; 87 unsigned m_length; 88}; 89 90// Iterates through the DOM range, returning all the text, and 0-length boundaries 91// at points where replaced elements break up the text flow. The text is delivered in 92// the chunks it's already stored in, to avoid copying any text. 93 94class TextIterator { 95public: 96 explicit TextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior); 97 ~TextIterator(); 98 99 bool atEnd() const { return !m_positionNode; } 100 void advance(); 101 102 StringView text() const { ASSERT(!atEnd()); return m_text; } 103 PassRefPtr<Range> range() const; 104 Node* node() const; 105 106 const TextIteratorCopyableText& copyableText() const { ASSERT(!atEnd()); return m_copyableText; } 107 void appendTextToStringBuilder(StringBuilder& builder) const { copyableText().appendToStringBuilder(builder); } 108 109 static int rangeLength(const Range*, bool spacesForReplacedElements = false); 110 static PassRefPtr<Range> rangeFromLocationAndLength(ContainerNode* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false); 111 static bool getLocationAndLengthFromRange(Node* scope, const Range*, size_t& location, size_t& length); 112 static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount); 113 114private: 115 void exitNode(); 116 bool shouldRepresentNodeOffsetZero(); 117 bool shouldEmitSpaceBeforeAndAfterNode(Node&); 118 void representNodeOffsetZero(); 119 bool handleTextNode(); 120 bool handleReplacedElement(); 121 bool handleNonTextNode(); 122 void handleTextBox(); 123 void handleTextNodeFirstLetter(RenderTextFragment&); 124 void emitCharacter(UChar, Node& characterNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset); 125 void emitText(Text& textNode, RenderText&, int textStartOffset, int textEndOffset); 126 127 const TextIteratorBehavior m_behavior; 128 129 // Current position, not necessarily of the text being returned, but position as we walk through the DOM tree. 130 Node* m_node; 131 int m_offset; 132 bool m_handledNode; 133 bool m_handledChildren; 134 BitStack m_fullyClippedStack; 135 136 // The range. 137 Node* m_startContainer; 138 int m_startOffset; 139 Node* m_endContainer; 140 int m_endOffset; 141 Node* m_pastEndNode; 142 143 // The current text and its position, in the form to be returned from the iterator. 144 Node* m_positionNode; 145 mutable Node* m_positionOffsetBaseNode; 146 mutable int m_positionStartOffset; 147 mutable int m_positionEndOffset; 148 TextIteratorCopyableText m_copyableText; 149 StringView m_text; 150 151 // Used when there is still some pending text from the current node; when these are false and null, we go back to normal iterating. 152 bool m_needsAnotherNewline; 153 InlineTextBox* m_textBox; 154 155 // Used when iterating over :first-letter text to save pointer to remaining text box. 156 InlineTextBox* m_remainingTextBox; 157 158 // Used to point to RenderText object for :first-letter. 159 RenderText* m_firstLetterText; 160 161 // Used to do the whitespace collapsing logic. 162 Text* m_lastTextNode; 163 bool m_lastTextNodeEndedWithCollapsedSpace; 164 UChar m_lastCharacter; 165 166 // Used when text boxes are out of order (Hebrew/Arabic with embedded LTR text) 167 Vector<InlineTextBox*> m_sortedTextBoxes; 168 size_t m_sortedTextBoxesPosition; 169 170 // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content 171 bool m_hasEmitted; 172 173 // Used when deciding text fragment created by :first-letter should be looked into. 174 bool m_handledFirstLetter; 175}; 176 177// Iterates through the DOM range, returning all the text, and 0-length boundaries 178// at points where replaced elements break up the text flow. The text comes back in 179// chunks so as to optimize for performance of the iteration. 180class SimplifiedBackwardsTextIterator { 181public: 182 explicit SimplifiedBackwardsTextIterator(const Range&, TextIteratorBehavior = TextIteratorDefaultBehavior); 183 184 bool atEnd() const { return !m_positionNode; } 185 void advance(); 186 187 StringView text() const { ASSERT(!atEnd()); return m_text; } 188 PassRefPtr<Range> range() const; 189 Node* node() const { ASSERT(!atEnd()); return m_node; } 190 191private: 192 void exitNode(); 193 bool handleTextNode(); 194 RenderText* handleFirstLetter(int& startOffset, int& offsetInNode); 195 bool handleReplacedElement(); 196 bool handleNonTextNode(); 197 void emitCharacter(UChar, Node&, int startOffset, int endOffset); 198 bool advanceRespectingRange(Node*); 199 200 const TextIteratorBehavior m_behavior; 201 202 // Current position, not necessarily of the text being returned, but position as we walk through the DOM tree. 203 Node* m_node; 204 int m_offset; 205 bool m_handledNode; 206 bool m_handledChildren; 207 BitStack m_fullyClippedStack; 208 209 // The range. 210 Node* m_startContainer; 211 int m_startOffset; 212 Node* m_endContainer; 213 int m_endOffset; 214 215 // The current text and its position, in the form to be returned from the iterator. 216 Node* m_positionNode; 217 int m_positionStartOffset; 218 int m_positionEndOffset; 219 TextIteratorCopyableText m_copyableText; 220 StringView m_text; 221 222 // Used to do the whitespace logic. 223 Text* m_lastTextNode; 224 UChar m_lastCharacter; 225 226 // Whether m_node has advanced beyond the iteration range (i.e. m_startContainer). 227 bool m_havePassedStartContainer; 228 229 // Should handle first-letter renderer in the next call to handleTextNode. 230 bool m_shouldHandleFirstLetter; 231}; 232 233// Builds on the text iterator, adding a character position so we can walk one 234// character at a time, or faster, as needed. Useful for searching. 235class CharacterIterator { 236public: 237 explicit CharacterIterator(const Range&, TextIteratorBehavior = TextIteratorDefaultBehavior); 238 239 bool atEnd() const { return m_underlyingIterator.atEnd(); } 240 void advance(int numCharacters); 241 242 StringView text() const { return m_underlyingIterator.text().substring(m_runOffset); } 243 PassRefPtr<Range> range() const; 244 245 bool atBreak() const { return m_atBreak; } 246 int characterOffset() const { return m_offset; } 247 248private: 249 TextIterator m_underlyingIterator; 250 251 int m_offset; 252 int m_runOffset; 253 bool m_atBreak; 254}; 255 256class BackwardsCharacterIterator { 257public: 258 explicit BackwardsCharacterIterator(const Range&); 259 260 bool atEnd() const { return m_underlyingIterator.atEnd(); } 261 void advance(int numCharacters); 262 263 PassRefPtr<Range> range() const; 264 265private: 266 SimplifiedBackwardsTextIterator m_underlyingIterator; 267 268 int m_offset; 269 int m_runOffset; 270 bool m_atBreak; 271}; 272 273// Similar to the TextIterator, except that the chunks of text returned are "well behaved", meaning 274// they never split up a word. This is useful for spell checking and perhaps one day for searching as well. 275class WordAwareIterator { 276public: 277 explicit WordAwareIterator(const Range&); 278 279 bool atEnd() const { return !m_didLookAhead && m_underlyingIterator.atEnd(); } 280 void advance(); 281 282 StringView text() const; 283 284private: 285 TextIterator m_underlyingIterator; 286 287 // Text from the previous chunk from the text iterator. 288 TextIteratorCopyableText m_previousText; 289 290 // Many chunks from text iterator concatenated. 291 Vector<UChar> m_buffer; 292 293 // Did we have to look ahead in the text iterator to confirm the current chunk? 294 bool m_didLookAhead; 295}; 296 297} 298 299#endif 300