1/* 2 * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#ifndef TextIterator_h 27#define TextIterator_h 28 29#include "FindOptions.h" 30#include "Range.h" 31#include <wtf/Vector.h> 32 33namespace WebCore { 34 35class InlineTextBox; 36class RenderText; 37class RenderTextFragment; 38 39enum TextIteratorBehavior { 40 TextIteratorDefaultBehavior = 0, 41 TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0, 42 TextIteratorEntersTextControls = 1 << 1, 43 TextIteratorEmitsTextsWithoutTranscoding = 1 << 2, 44 TextIteratorIgnoresStyleVisibility = 1 << 3, 45 TextIteratorEmitsObjectReplacementCharacters = 1 << 4, 46 TextIteratorEmitsOriginalText = 1 << 5, 47 TextIteratorStopsOnFormControls = 1 << 6, 48 TextIteratorEmitsImageAltText = 1 << 7, 49}; 50 51// FIXME: Can't really answer this question correctly without knowing the white-space mode. 52// FIXME: Move this somewhere else in the editing directory. It doesn't belong here. 53inline bool isCollapsibleWhitespace(UChar c) 54{ 55 switch (c) { 56 case ' ': 57 case '\n': 58 return true; 59 default: 60 return false; 61 } 62} 63 64String plainText(const Range*, TextIteratorBehavior defaultBehavior = TextIteratorDefaultBehavior, bool isDisplayString = false); 65PassRefPtr<Range> findPlainText(const Range*, const String&, FindOptions); 66bool isRendererReplacedElement(RenderObject*); 67 68class BitStack { 69public: 70 BitStack(); 71 ~BitStack(); 72 73 void push(bool); 74 void pop(); 75 76 bool top() const; 77 unsigned size() const; 78 79private: 80 unsigned m_size; 81 Vector<unsigned, 1> m_words; 82}; 83 84// Iterates through the DOM range, returning all the text, and 0-length boundaries 85// at points where replaced elements break up the text flow. The text comes back in 86// chunks so as to optimize for performance of the iteration. 87 88class TextIterator { 89public: 90 explicit TextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior); 91 ~TextIterator(); 92 93 bool atEnd() const { return !m_positionNode || m_shouldStop; } 94 void advance(); 95 96 int length() const { return m_textLength; } 97 const UChar* characters() const { return m_textCharacters ? m_textCharacters : m_text.characters() + startOffset(); } 98 UChar characterAt(unsigned index) const; 99 void appendTextToStringBuilder(StringBuilder&) const; 100 101 PassRefPtr<Range> range() const; 102 Node* node() const; 103 104 static int rangeLength(const Range*, bool spacesForReplacedElements = false); 105 static PassRefPtr<Range> rangeFromLocationAndLength(ContainerNode* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false); 106 static bool getLocationAndLengthFromRange(Node* scope, const Range*, size_t& location, size_t& length); 107 static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount); 108 109private: 110 int startOffset() const { return m_positionStartOffset; } 111 const String& string() const { return m_text; } 112 void exitNode(); 113 bool shouldRepresentNodeOffsetZero(); 114 bool shouldEmitSpaceBeforeAndAfterNode(Node*); 115 void representNodeOffsetZero(); 116 bool handleTextNode(); 117 bool handleReplacedElement(); 118 bool handleNonTextNode(); 119 void handleTextBox(); 120 void handleTextNodeFirstLetter(RenderTextFragment*); 121 bool hasVisibleTextNode(RenderText*); 122 void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset); 123 void emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset); 124 void emitText(Node* textNode, int textStartOffset, int textEndOffset); 125 126 // Current position, not necessarily of the text being returned, but position 127 // as we walk through the DOM tree. 128 Node* m_node; 129 int m_offset; 130 bool m_handledNode; 131 bool m_handledChildren; 132 BitStack m_fullyClippedStack; 133 134 // The range. 135 Node* m_startContainer; 136 int m_startOffset; 137 Node* m_endContainer; 138 int m_endOffset; 139 Node* m_pastEndNode; 140 141 // The current text and its position, in the form to be returned from the iterator. 142 Node* m_positionNode; 143 mutable Node* m_positionOffsetBaseNode; 144 mutable int m_positionStartOffset; 145 mutable int m_positionEndOffset; 146 const UChar* m_textCharacters; // If null, then use m_text for character data. 147 int m_textLength; 148 // Hold string m_textCharacters points to so we ensure it won't be deleted. 149 String m_text; 150 151 // Used when there is still some pending text from the current node; when these 152 // are false and 0, we go back to normal iterating. 153 bool m_needsAnotherNewline; 154 InlineTextBox* m_textBox; 155 // Used when iteration over :first-letter text to save pointer to 156 // remaining text box. 157 InlineTextBox* m_remainingTextBox; 158 // Used to point to RenderText object for :first-letter. 159 RenderText *m_firstLetterText; 160 161 // Used to do the whitespace collapsing logic. 162 Node* m_lastTextNode; 163 bool m_lastTextNodeEndedWithCollapsedSpace; 164 UChar m_lastCharacter; 165 166 // Used for whitespace characters that aren't in the DOM, so we can point at them. 167 UChar m_singleCharacterBuffer; 168 169 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text) 170 Vector<InlineTextBox*> m_sortedTextBoxes; 171 size_t m_sortedTextBoxesPosition; 172 173 // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content 174 bool m_hasEmitted; 175 176 // Used by selection preservation code. There should be one character emitted between every VisiblePosition 177 // in the Range used to create the TextIterator. 178 // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite 179 // moveParagraphs to not clone/destroy moved content. 180 bool m_emitsCharactersBetweenAllVisiblePositions; 181 bool m_entersTextControls; 182 183 // Used when we want texts for copying, pasting, and transposing. 184 bool m_emitsTextWithoutTranscoding; 185 // Used in pasting inside password field. 186 bool m_emitsOriginalText; 187 // Used when deciding text fragment created by :first-letter should be looked into. 188 bool m_handledFirstLetter; 189 // Used when the visibility of the style should not affect text gathering. 190 bool m_ignoresStyleVisibility; 191 // Used when emitting the special 0xFFFC character is required. 192 bool m_emitsObjectReplacementCharacters; 193 // Used when the iteration should stop if form controls are reached. 194 bool m_stopsOnFormControls; 195 // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing. 196 bool m_shouldStop; 197 198 bool m_emitsImageAltText; 199}; 200 201// Iterates through the DOM range, returning all the text, and 0-length boundaries 202// at points where replaced elements break up the text flow. The text comes back in 203// chunks so as to optimize for performance of the iteration. 204class SimplifiedBackwardsTextIterator { 205public: 206 explicit SimplifiedBackwardsTextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior); 207 208 bool atEnd() const { return !m_positionNode || m_shouldStop; } 209 void advance(); 210 211 int length() const { return m_textLength; } 212 const UChar* characters() const { return m_textCharacters; } 213 214 PassRefPtr<Range> range() const; 215 216private: 217 void exitNode(); 218 bool handleTextNode(); 219 RenderText* handleFirstLetter(int& startOffset, int& offsetInNode); 220 bool handleReplacedElement(); 221 bool handleNonTextNode(); 222 void emitCharacter(UChar, Node*, int startOffset, int endOffset); 223 bool advanceRespectingRange(Node*); 224 225 // Current position, not necessarily of the text being returned, but position 226 // as we walk through the DOM tree. 227 Node* m_node; 228 int m_offset; 229 bool m_handledNode; 230 bool m_handledChildren; 231 BitStack m_fullyClippedStack; 232 233 // End of the range. 234 Node* m_startNode; 235 int m_startOffset; 236 // Start of the range. 237 Node* m_endNode; 238 int m_endOffset; 239 240 // The current text and its position, in the form to be returned from the iterator. 241 Node* m_positionNode; 242 int m_positionStartOffset; 243 int m_positionEndOffset; 244 const UChar* m_textCharacters; 245 int m_textLength; 246 247 // Used to do the whitespace logic. 248 Node* m_lastTextNode; 249 UChar m_lastCharacter; 250 251 // Used for whitespace characters that aren't in the DOM, so we can point at them. 252 UChar m_singleCharacterBuffer; 253 254 // Whether m_node has advanced beyond the iteration range (i.e. m_startNode). 255 bool m_havePassedStartNode; 256 257 // Should handle first-letter renderer in the next call to handleTextNode. 258 bool m_shouldHandleFirstLetter; 259 260 // Used when the iteration should stop if form controls are reached. 261 bool m_stopsOnFormControls; 262 263 // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing. 264 bool m_shouldStop; 265 266 // Used in pasting inside password field. 267 bool m_emitsOriginalText; 268}; 269 270// Builds on the text iterator, adding a character position so we can walk one 271// character at a time, or faster, as needed. Useful for searching. 272class CharacterIterator { 273public: 274 explicit CharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior); 275 276 void advance(int numCharacters); 277 278 bool atBreak() const { return m_atBreak; } 279 bool atEnd() const { return m_textIterator.atEnd(); } 280 281 int length() const { return m_textIterator.length() - m_runOffset; } 282 const UChar* characters() const { return m_textIterator.characters() + m_runOffset; } 283 String string(int numChars); 284 285 int characterOffset() const { return m_offset; } 286 PassRefPtr<Range> range() const; 287 288private: 289 int m_offset; 290 int m_runOffset; 291 bool m_atBreak; 292 293 TextIterator m_textIterator; 294}; 295 296class BackwardsCharacterIterator { 297public: 298 explicit BackwardsCharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior); 299 300 void advance(int); 301 302 bool atEnd() const { return m_textIterator.atEnd(); } 303 304 PassRefPtr<Range> range() const; 305 306private: 307 int m_offset; 308 int m_runOffset; 309 bool m_atBreak; 310 311 SimplifiedBackwardsTextIterator m_textIterator; 312}; 313 314// Very similar to the TextIterator, except that the chunks of text returned are "well behaved", 315// meaning they never end split up a word. This is useful for spellcheck or (perhaps one day) searching. 316class WordAwareIterator { 317public: 318 explicit WordAwareIterator(const Range*); 319 ~WordAwareIterator(); 320 321 bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); } 322 void advance(); 323 324 int length() const; 325 const UChar* characters() const; 326 327 // Range of the text we're currently returning 328 PassRefPtr<Range> range() const { return m_range; } 329 330private: 331 // text from the previous chunk from the textIterator 332 const UChar* m_previousText; 333 int m_previousLength; 334 335 // many chunks from textIterator concatenated 336 Vector<UChar> m_buffer; 337 338 // Did we have to look ahead in the textIterator to confirm the current chunk? 339 bool m_didLookAhead; 340 341 RefPtr<Range> m_range; 342 343 TextIterator m_textIterator; 344}; 345 346} 347 348#endif 349