1/*
2 * Copyright (C) 2004, 2006, 2009, 2014 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#ifndef TextIterator_h
27#define TextIterator_h
28
29// FIXME: Move each iterator class into a separate header file.
30
31#include "FindOptions.h"
32#include "Range.h"
33#include "TextIteratorBehavior.h"
34#include <wtf/Vector.h>
35#include <wtf/text/StringView.h>
36
37namespace WebCore {
38
39class InlineTextBox;
40class RenderText;
41class RenderTextFragment;
42
43String plainText(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior, bool isDisplayString = false);
44String plainTextReplacingNoBreakSpace(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior, bool isDisplayString = false);
45PassRefPtr<Range> findPlainText(const Range&, const String&, FindOptions);
46
47// FIXME: Move this somewhere else in the editing directory. It doesn't belong here.
48bool isRendererReplacedElement(RenderObject*);
49
50class BitStack {
51public:
52    BitStack();
53    ~BitStack();
54
55    void push(bool);
56    void pop();
57
58    bool top() const;
59    unsigned size() const;
60
61private:
62    unsigned m_size;
63    Vector<unsigned, 1> m_words;
64};
65
66class TextIteratorCopyableText {
67public:
68    TextIteratorCopyableText()
69        : m_singleCharacter(0)
70        , m_offset(0)
71        , m_length(0)
72    {
73    }
74
75    StringView text() const { return m_singleCharacter ? StringView(&m_singleCharacter, 1) : StringView(m_string).substring(m_offset, m_length); }
76    void appendToStringBuilder(StringBuilder&) const;
77
78    void reset();
79    void set(String&&);
80    void set(String&&, unsigned offset, unsigned length);
81    void set(UChar);
82
83private:
84    UChar m_singleCharacter;
85    String m_string;
86    unsigned m_offset;
87    unsigned m_length;
88};
89
90// Iterates through the DOM range, returning all the text, and 0-length boundaries
91// at points where replaced elements break up the text flow. The text is delivered in
92// the chunks it's already stored in, to avoid copying any text.
93
94class TextIterator {
95public:
96    explicit TextIterator(const Range*, TextIteratorBehavior = TextIteratorDefaultBehavior);
97    ~TextIterator();
98
99    bool atEnd() const { return !m_positionNode; }
100    void advance();
101
102    StringView text() const { ASSERT(!atEnd()); return m_text; }
103    PassRefPtr<Range> range() const;
104    Node* node() const;
105
106    const TextIteratorCopyableText& copyableText() const { ASSERT(!atEnd()); return m_copyableText; }
107    void appendTextToStringBuilder(StringBuilder& builder) const { copyableText().appendToStringBuilder(builder); }
108
109    static int rangeLength(const Range*, bool spacesForReplacedElements = false);
110    static PassRefPtr<Range> rangeFromLocationAndLength(ContainerNode* scope, int rangeLocation, int rangeLength, bool spacesForReplacedElements = false);
111    static bool getLocationAndLengthFromRange(Node* scope, const Range*, size_t& location, size_t& length);
112    static PassRefPtr<Range> subrange(Range* entireRange, int characterOffset, int characterCount);
113
114private:
115    void exitNode();
116    bool shouldRepresentNodeOffsetZero();
117    bool shouldEmitSpaceBeforeAndAfterNode(Node&);
118    void representNodeOffsetZero();
119    bool handleTextNode();
120    bool handleReplacedElement();
121    bool handleNonTextNode();
122    void handleTextBox();
123    void handleTextNodeFirstLetter(RenderTextFragment&);
124    void emitCharacter(UChar, Node& characterNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset);
125    void emitText(Text& textNode, RenderText&, int textStartOffset, int textEndOffset);
126
127    const TextIteratorBehavior m_behavior;
128
129    // Current position, not necessarily of the text being returned, but position as we walk through the DOM tree.
130    Node* m_node;
131    int m_offset;
132    bool m_handledNode;
133    bool m_handledChildren;
134    BitStack m_fullyClippedStack;
135
136    // The range.
137    Node* m_startContainer;
138    int m_startOffset;
139    Node* m_endContainer;
140    int m_endOffset;
141    Node* m_pastEndNode;
142
143    // The current text and its position, in the form to be returned from the iterator.
144    Node* m_positionNode;
145    mutable Node* m_positionOffsetBaseNode;
146    mutable int m_positionStartOffset;
147    mutable int m_positionEndOffset;
148    TextIteratorCopyableText m_copyableText;
149    StringView m_text;
150
151    // Used when there is still some pending text from the current node; when these are false and null, we go back to normal iterating.
152    bool m_needsAnotherNewline;
153    InlineTextBox* m_textBox;
154
155    // Used when iterating over :first-letter text to save pointer to remaining text box.
156    InlineTextBox* m_remainingTextBox;
157
158    // Used to point to RenderText object for :first-letter.
159    RenderText* m_firstLetterText;
160
161    // Used to do the whitespace collapsing logic.
162    Text* m_lastTextNode;
163    bool m_lastTextNodeEndedWithCollapsedSpace;
164    UChar m_lastCharacter;
165
166    // Used when text boxes are out of order (Hebrew/Arabic with embedded LTR text)
167    Vector<InlineTextBox*> m_sortedTextBoxes;
168    size_t m_sortedTextBoxesPosition;
169
170    // Used when deciding whether to emit a "positioning" (e.g. newline) before any other content
171    bool m_hasEmitted;
172
173    // Used when deciding text fragment created by :first-letter should be looked into.
174    bool m_handledFirstLetter;
175};
176
177// Iterates through the DOM range, returning all the text, and 0-length boundaries
178// at points where replaced elements break up the text flow. The text comes back in
179// chunks so as to optimize for performance of the iteration.
180class SimplifiedBackwardsTextIterator {
181public:
182    explicit SimplifiedBackwardsTextIterator(const Range&, TextIteratorBehavior = TextIteratorDefaultBehavior);
183
184    bool atEnd() const { return !m_positionNode; }
185    void advance();
186
187    StringView text() const { ASSERT(!atEnd()); return m_text; }
188    PassRefPtr<Range> range() const;
189    Node* node() const { ASSERT(!atEnd()); return m_node; }
190
191private:
192    void exitNode();
193    bool handleTextNode();
194    RenderText* handleFirstLetter(int& startOffset, int& offsetInNode);
195    bool handleReplacedElement();
196    bool handleNonTextNode();
197    void emitCharacter(UChar, Node&, int startOffset, int endOffset);
198    bool advanceRespectingRange(Node*);
199
200    const TextIteratorBehavior m_behavior;
201
202    // Current position, not necessarily of the text being returned, but position as we walk through the DOM tree.
203    Node* m_node;
204    int m_offset;
205    bool m_handledNode;
206    bool m_handledChildren;
207    BitStack m_fullyClippedStack;
208
209    // The range.
210    Node* m_startContainer;
211    int m_startOffset;
212    Node* m_endContainer;
213    int m_endOffset;
214
215    // The current text and its position, in the form to be returned from the iterator.
216    Node* m_positionNode;
217    int m_positionStartOffset;
218    int m_positionEndOffset;
219    TextIteratorCopyableText m_copyableText;
220    StringView m_text;
221
222    // Used to do the whitespace logic.
223    Text* m_lastTextNode;
224    UChar m_lastCharacter;
225
226    // Whether m_node has advanced beyond the iteration range (i.e. m_startContainer).
227    bool m_havePassedStartContainer;
228
229    // Should handle first-letter renderer in the next call to handleTextNode.
230    bool m_shouldHandleFirstLetter;
231};
232
233// Builds on the text iterator, adding a character position so we can walk one
234// character at a time, or faster, as needed. Useful for searching.
235class CharacterIterator {
236public:
237    explicit CharacterIterator(const Range&, TextIteratorBehavior = TextIteratorDefaultBehavior);
238
239    bool atEnd() const { return m_underlyingIterator.atEnd(); }
240    void advance(int numCharacters);
241
242    StringView text() const { return m_underlyingIterator.text().substring(m_runOffset); }
243    PassRefPtr<Range> range() const;
244
245    bool atBreak() const { return m_atBreak; }
246    int characterOffset() const { return m_offset; }
247
248private:
249    TextIterator m_underlyingIterator;
250
251    int m_offset;
252    int m_runOffset;
253    bool m_atBreak;
254};
255
256class BackwardsCharacterIterator {
257public:
258    explicit BackwardsCharacterIterator(const Range&);
259
260    bool atEnd() const { return m_underlyingIterator.atEnd(); }
261    void advance(int numCharacters);
262
263    PassRefPtr<Range> range() const;
264
265private:
266    SimplifiedBackwardsTextIterator m_underlyingIterator;
267
268    int m_offset;
269    int m_runOffset;
270    bool m_atBreak;
271};
272
273// Similar to the TextIterator, except that the chunks of text returned are "well behaved", meaning
274// they never split up a word. This is useful for spell checking and perhaps one day for searching as well.
275class WordAwareIterator {
276public:
277    explicit WordAwareIterator(const Range&);
278
279    bool atEnd() const { return !m_didLookAhead && m_underlyingIterator.atEnd(); }
280    void advance();
281
282    StringView text() const;
283
284private:
285    TextIterator m_underlyingIterator;
286
287    // Text from the previous chunk from the text iterator.
288    TextIteratorCopyableText m_previousText;
289
290    // Many chunks from text iterator concatenated.
291    Vector<UChar> m_buffer;
292
293    // Did we have to look ahead in the text iterator to confirm the current chunk?
294    bool m_didLookAhead;
295};
296
297}
298
299#endif
300