1/*
2 * Copyright (C) 2004, 2006, 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#ifndef TextIterator_h
27#define TextIterator_h
28
29#include "FindOptions.h"
30#include "Range.h"
31#include <wtf/Vector.h>
32
33namespace WebCore {
34
35class InlineTextBox;
36class RenderText;
37class RenderTextFragment;
38
39enum TextIteratorBehavior {
40    TextIteratorDefaultBehavior = 0,
41    TextIteratorEmitsCharactersBetweenAllVisiblePositions = 1 << 0,
42    TextIteratorEntersTextControls = 1 << 1,
43    TextIteratorEmitsTextsWithoutTranscoding = 1 << 2,
44    TextIteratorIgnoresStyleVisibility = 1 << 3,
45    TextIteratorEmitsObjectReplacementCharacters = 1 << 4,
46    TextIteratorEmitsOriginalText = 1 << 5,
47    TextIteratorStopsOnFormControls = 1 << 6,
48    TextIteratorEmitsImageAltText = 1 << 7,
49};
50
51// FIXME: Can't really answer this question correctly without knowing the white-space mode.
52// FIXME: Move this somewhere else in the editing directory. It doesn't belong here.
53inline bool isCollapsibleWhitespace(UChar c)
54{
55    switch (c) {
56        case ' ':
57        case '\n':
58            return true;
59        default:
60            return false;
61    }
62}
63
64String plainText(const Range*, TextIteratorBehavior defaultBehavior = TextIteratorDefaultBehavior, bool isDisplayString = false);
65PassRefPtr<Range> findPlainText(const Range*, const String&, FindOptions);
66bool isRendererReplacedElement(RenderObject*);
67
68class BitStack {
69public:
70    BitStack();
71    ~BitStack();
72
73    void push(bool);
74    void pop();
75
76    bool top() const;
77    unsigned size() const;
78
79private:
80    unsigned m_size;
81    Vector<unsigned, 1> m_words;
82};
83
84// Iterates through the DOM range, returning all the text, and 0-length boundaries
85// at points where replaced elements break up the text flow.  The text comes back in
86// chunks so as to optimize for performance of the iteration.
87
88class TextIterator {
89public:
90    explicit TextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
91    ~TextIterator();
92
93    bool atEnd() const { return !m_positionNode || m_shouldStop; }
94    void advance();
95
96    int length() const { return m_textLength; }
97    const UChar* characters() const { return m_textCharacters ? m_textCharacters : m_text.characters() + startOffset(); }
98    UChar characterAt(unsigned index) const;
99    void appendTextToStringBuilder(StringBuilder&) const;
100
101    PassRefPtr<Range> range() const;
102    Node* node() const;
103
104    static int rangeLength(const Range*, bool spacesForReplacedElements = false);
105    static PassRefPtr<Range> rangeFromLocationAndLength(ContainerNode* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false);
106    static bool getLocationAndLengthFromRange(Node* scope, const Range*, size_t& location, size_t& length);
107    static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount);
108
109private:
110    int startOffset() const { return m_positionStartOffset; }
111    const String& string() const { return m_text; }
112    void exitNode();
113    bool shouldRepresentNodeOffsetZero();
114    bool shouldEmitSpaceBeforeAndAfterNode(Node*);
115    void representNodeOffsetZero();
116    bool handleTextNode();
117    bool handleReplacedElement();
118    bool handleNonTextNode();
119    void handleTextBox();
120    void handleTextNodeFirstLetter(RenderTextFragment*);
121    bool hasVisibleTextNode(RenderText*);
122    void emitCharacter(UChar, Node* textNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset);
123    void emitText(Node* textNode, RenderObject* renderObject, int textStartOffset, int textEndOffset);
124    void emitText(Node* textNode, int textStartOffset, int textEndOffset);
125
126    // Current position, not necessarily of the text being returned, but position
127    // as we walk through the DOM tree.
128    Node* m_node;
129    int m_offset;
130    bool m_handledNode;
131    bool m_handledChildren;
132    BitStack m_fullyClippedStack;
133
134    // The range.
135    Node* m_startContainer;
136    int m_startOffset;
137    Node* m_endContainer;
138    int m_endOffset;
139    Node* m_pastEndNode;
140
141    // The current text and its position, in the form to be returned from the iterator.
142    Node* m_positionNode;
143    mutable Node* m_positionOffsetBaseNode;
144    mutable int m_positionStartOffset;
145    mutable int m_positionEndOffset;
146    const UChar* m_textCharacters; // If null, then use m_text for character data.
147    int m_textLength;
148    // Hold string m_textCharacters points to so we ensure it won't be deleted.
149    String m_text;
150
151    // Used when there is still some pending text from the current node; when these
152    // are false and 0, we go back to normal iterating.
153    bool m_needsAnotherNewline;
154    InlineTextBox* m_textBox;
155    // Used when iteration over :first-letter text to save pointer to
156    // remaining text box.
157    InlineTextBox* m_remainingTextBox;
158    // Used to point to RenderText object for :first-letter.
159    RenderText *m_firstLetterText;
160
161    // Used to do the whitespace collapsing logic.
162    Node* m_lastTextNode;
163    bool m_lastTextNodeEndedWithCollapsedSpace;
164    UChar m_lastCharacter;
165
166    // Used for whitespace characters that aren't in the DOM, so we can point at them.
167    UChar m_singleCharacterBuffer;
168
169    // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
170    Vector<InlineTextBox*> m_sortedTextBoxes;
171    size_t m_sortedTextBoxesPosition;
172
173    // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
174    bool m_hasEmitted;
175
176    // Used by selection preservation code.  There should be one character emitted between every VisiblePosition
177    // in the Range used to create the TextIterator.
178    // FIXME <rdar://problem/6028818>: This functionality should eventually be phased out when we rewrite
179    // moveParagraphs to not clone/destroy moved content.
180    bool m_emitsCharactersBetweenAllVisiblePositions;
181    bool m_entersTextControls;
182
183    // Used when we want texts for copying, pasting, and transposing.
184    bool m_emitsTextWithoutTranscoding;
185    // Used in pasting inside password field.
186    bool m_emitsOriginalText;
187    // Used when deciding text fragment created by :first-letter should be looked into.
188    bool m_handledFirstLetter;
189    // Used when the visibility of the style should not affect text gathering.
190    bool m_ignoresStyleVisibility;
191    // Used when emitting the special 0xFFFC character is required.
192    bool m_emitsObjectReplacementCharacters;
193    // Used when the iteration should stop if form controls are reached.
194    bool m_stopsOnFormControls;
195    // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing.
196    bool m_shouldStop;
197
198    bool m_emitsImageAltText;
199};
200
201// Iterates through the DOM range, returning all the text, and 0-length boundaries
202// at points where replaced elements break up the text flow. The text comes back in
203// chunks so as to optimize for performance of the iteration.
204class SimplifiedBackwardsTextIterator {
205public:
206    explicit SimplifiedBackwardsTextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
207
208    bool atEnd() const { return !m_positionNode || m_shouldStop; }
209    void advance();
210
211    int length() const { return m_textLength; }
212    const UChar* characters() const { return m_textCharacters; }
213
214    PassRefPtr<Range> range() const;
215
216private:
217    void exitNode();
218    bool handleTextNode();
219    RenderText* handleFirstLetter(int& startOffset, int& offsetInNode);
220    bool handleReplacedElement();
221    bool handleNonTextNode();
222    void emitCharacter(UChar, Node*, int startOffset, int endOffset);
223    bool advanceRespectingRange(Node*);
224
225    // Current position, not necessarily of the text being returned, but position
226    // as we walk through the DOM tree.
227    Node* m_node;
228    int m_offset;
229    bool m_handledNode;
230    bool m_handledChildren;
231    BitStack m_fullyClippedStack;
232
233    // End of the range.
234    Node* m_startNode;
235    int m_startOffset;
236    // Start of the range.
237    Node* m_endNode;
238    int m_endOffset;
239
240    // The current text and its position, in the form to be returned from the iterator.
241    Node* m_positionNode;
242    int m_positionStartOffset;
243    int m_positionEndOffset;
244    const UChar* m_textCharacters;
245    int m_textLength;
246
247    // Used to do the whitespace logic.
248    Node* m_lastTextNode;
249    UChar m_lastCharacter;
250
251    // Used for whitespace characters that aren't in the DOM, so we can point at them.
252    UChar m_singleCharacterBuffer;
253
254    // Whether m_node has advanced beyond the iteration range (i.e. m_startNode).
255    bool m_havePassedStartNode;
256
257    // Should handle first-letter renderer in the next call to handleTextNode.
258    bool m_shouldHandleFirstLetter;
259
260    // Used when the iteration should stop if form controls are reached.
261    bool m_stopsOnFormControls;
262
263    // Used when m_stopsOnFormControls is set to determine if the iterator should keep advancing.
264    bool m_shouldStop;
265
266    // Used in pasting inside password field.
267    bool m_emitsOriginalText;
268};
269
270// Builds on the text iterator, adding a character position so we can walk one
271// character at a time, or faster, as needed. Useful for searching.
272class CharacterIterator {
273public:
274    explicit CharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
275
276    void advance(int numCharacters);
277
278    bool atBreak() const { return m_atBreak; }
279    bool atEnd() const { return m_textIterator.atEnd(); }
280
281    int length() const { return m_textIterator.length() - m_runOffset; }
282    const UChar* characters() const { return m_textIterator.characters() + m_runOffset; }
283    String string(int numChars);
284
285    int characterOffset() const { return m_offset; }
286    PassRefPtr<Range> range() const;
287
288private:
289    int m_offset;
290    int m_runOffset;
291    bool m_atBreak;
292
293    TextIterator m_textIterator;
294};
295
296class BackwardsCharacterIterator {
297public:
298    explicit BackwardsCharacterIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
299
300    void advance(int);
301
302    bool atEnd() const { return m_textIterator.atEnd(); }
303
304    PassRefPtr<Range> range() const;
305
306private:
307    int m_offset;
308    int m_runOffset;
309    bool m_atBreak;
310
311    SimplifiedBackwardsTextIterator m_textIterator;
312};
313
314// Very similar to the TextIterator, except that the chunks of text returned are "well behaved",
315// meaning they never end split up a word.  This is useful for spellcheck or (perhaps one day) searching.
316class WordAwareIterator {
317public:
318    explicit WordAwareIterator(const Range*);
319    ~WordAwareIterator();
320
321    bool atEnd() const { return !m_didLookAhead && m_textIterator.atEnd(); }
322    void advance();
323
324    int length() const;
325    const UChar* characters() const;
326
327    // Range of the text we're currently returning
328    PassRefPtr<Range> range() const { return m_range; }
329
330private:
331    // text from the previous chunk from the textIterator
332    const UChar* m_previousText;
333    int m_previousLength;
334
335    // many chunks from textIterator concatenated
336    Vector<UChar> m_buffer;
337
338    // Did we have to look ahead in the textIterator to confirm the current chunk?
339    bool m_didLookAhead;
340
341    RefPtr<Range> m_range;
342
343    TextIterator m_textIterator;
344};
345
346}
347
348#endif
349