1/*
2 * Copyright (C) 2004-2014 Apple Inc. All rights reserved.
3 * Copyright (C) 2005 Alexey Proskuryakov.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "config.h"
28#include "TextIterator.h"
29
30#include "Document.h"
31#include "ExceptionCodePlaceholder.h"
32#include "Font.h"
33#include "Frame.h"
34#include "HTMLElement.h"
35#include "HTMLNames.h"
36#include "HTMLTextFormControlElement.h"
37#include "InlineTextBox.h"
38#include "NodeTraversal.h"
39#include "RenderImage.h"
40#include "RenderIterator.h"
41#include "RenderTableCell.h"
42#include "RenderTableRow.h"
43#include "RenderTextControl.h"
44#include "RenderTextFragment.h"
45#include "ShadowRoot.h"
46#include "TextBoundaries.h"
47#include "TextBreakIterator.h"
48#include "TextControlInnerElements.h"
49#include "VisiblePosition.h"
50#include "VisibleUnits.h"
51#include "htmlediting.h"
52#include <wtf/text/CString.h>
53#include <wtf/text/StringBuilder.h>
54#include <wtf/unicode/CharacterNames.h>
55
56#if !UCONFIG_NO_COLLATION
57#include "TextBreakIteratorInternalICU.h"
58#include <unicode/usearch.h>
59#endif
60
61using namespace WTF::Unicode;
62
63namespace WebCore {
64
65using namespace HTMLNames;
66
67// Buffer that knows how to compare with a search target.
68// Keeps enough of the previous text to be able to search in the future, but no more.
69// Non-breaking spaces are always equal to normal spaces.
70// Case folding is also done if the CaseInsensitive option is specified.
71// Matches are further filtered if the AtWordStarts option is specified, although some
72// matches inside a word are permitted if TreatMedialCapitalAsWordStart is specified as well.
73class SearchBuffer {
74    WTF_MAKE_NONCOPYABLE(SearchBuffer);
75public:
76    SearchBuffer(const String& target, FindOptions);
77    ~SearchBuffer();
78
79    // Returns number of characters appended; guaranteed to be in the range [1, length].
80    size_t append(StringView);
81    bool needsMoreContext() const;
82    void prependContext(StringView);
83    void reachedBreak();
84
85    // Result is the size in characters of what was found.
86    // And <startOffset> is the number of characters back to the start of what was found.
87    size_t search(size_t& startOffset);
88    bool atBreak() const;
89
90#if !UCONFIG_NO_COLLATION
91
92private:
93    bool isBadMatch(const UChar*, size_t length) const;
94    bool isWordStartMatch(size_t start, size_t length) const;
95    bool isWordEndMatch(size_t start, size_t length) const;
96
97    const String m_target;
98    const StringView::UpconvertedCharacters m_targetCharacters;
99    FindOptions m_options;
100
101    Vector<UChar> m_buffer;
102    size_t m_overlap;
103    size_t m_prefixLength;
104    bool m_atBreak;
105    bool m_needsMoreContext;
106
107    const bool m_targetRequiresKanaWorkaround;
108    Vector<UChar> m_normalizedTarget;
109    mutable Vector<UChar> m_normalizedMatch;
110
111#else
112
113private:
114    void append(UChar, bool isCharacterStart);
115    size_t length() const;
116
117    String m_target;
118    FindOptions m_options;
119
120    Vector<UChar> m_buffer;
121    Vector<bool> m_isCharacterStartBuffer;
122    bool m_isBufferFull;
123    size_t m_cursor;
124
125#endif
126};
127
128// --------
129
130static const unsigned bitsInWord = sizeof(unsigned) * 8;
131static const unsigned bitInWordMask = bitsInWord - 1;
132
133BitStack::BitStack()
134    : m_size(0)
135{
136}
137
138BitStack::~BitStack()
139{
140}
141
142void BitStack::push(bool bit)
143{
144    unsigned index = m_size / bitsInWord;
145    unsigned shift = m_size & bitInWordMask;
146    if (!shift && index == m_words.size()) {
147        m_words.grow(index + 1);
148        m_words[index] = 0;
149    }
150    unsigned& word = m_words[index];
151    unsigned mask = 1U << shift;
152    if (bit)
153        word |= mask;
154    else
155        word &= ~mask;
156    ++m_size;
157}
158
159void BitStack::pop()
160{
161    if (m_size)
162        --m_size;
163}
164
165bool BitStack::top() const
166{
167    if (!m_size)
168        return false;
169    unsigned shift = (m_size - 1) & bitInWordMask;
170    return m_words.last() & (1U << shift);
171}
172
173unsigned BitStack::size() const
174{
175    return m_size;
176}
177
178// --------
179
180#if !ASSERT_DISABLED
181
182static unsigned depthCrossingShadowBoundaries(Node& node)
183{
184    unsigned depth = 0;
185    for (Node* parent = node.parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode())
186        ++depth;
187    return depth;
188}
189
190#endif
191
192// This function is like Range::pastLastNode, except for the fact that it can climb up out of shadow trees.
193static Node* nextInPreOrderCrossingShadowBoundaries(Node& rangeEndContainer, int rangeEndOffset)
194{
195    if (rangeEndOffset >= 0 && !rangeEndContainer.offsetInCharacters()) {
196        if (Node* next = rangeEndContainer.childNode(rangeEndOffset))
197            return next;
198    }
199    for (Node* node = &rangeEndContainer; node; node = node->parentOrShadowHostNode()) {
200        if (Node* next = node->nextSibling())
201            return next;
202    }
203    return nullptr;
204}
205
206static inline bool fullyClipsContents(Node& node)
207{
208    auto* renderer = node.renderer();
209    if (!renderer || !renderer->isBox() || !renderer->hasOverflowClip())
210        return false;
211    return toRenderBox(renderer)->size().isEmpty();
212}
213
214static inline bool ignoresContainerClip(Node& node)
215{
216    auto* renderer = node.renderer();
217    if (!renderer || renderer->isTextOrLineBreak())
218        return false;
219    return renderer->style().hasOutOfFlowPosition();
220}
221
222static void pushFullyClippedState(BitStack& stack, Node& node)
223{
224    ASSERT(stack.size() == depthCrossingShadowBoundaries(node));
225
226    // Push true if this node full clips its contents, or if a parent already has fully
227    // clipped and this is not a node that ignores its container's clip.
228    stack.push(fullyClipsContents(node) || (stack.top() && !ignoresContainerClip(node)));
229}
230
231static void setUpFullyClippedStack(BitStack& stack, Node& node)
232{
233    // Put the nodes in a vector so we can iterate in reverse order.
234    Vector<Node*, 100> ancestry;
235    for (Node* parent = node.parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode())
236        ancestry.append(parent);
237
238    // Call pushFullyClippedState on each node starting with the earliest ancestor.
239    size_t size = ancestry.size();
240    for (size_t i = 0; i < size; ++i)
241        pushFullyClippedState(stack, *ancestry[size - i - 1]);
242    pushFullyClippedState(stack, node);
243
244    ASSERT(stack.size() == 1 + depthCrossingShadowBoundaries(node));
245}
246
247// FIXME: editingIgnoresContent and isRendererReplacedElement try to do the same job.
248// It's not good to have both of them.
249bool isRendererReplacedElement(RenderObject* renderer)
250{
251    if (!renderer)
252        return false;
253
254    if (renderer->isImage() || renderer->isWidget() || renderer->isMedia())
255        return true;
256
257    if (renderer->node() && renderer->node()->isElementNode()) {
258        Element& element = toElement(*renderer->node());
259        if (element.isFormControlElement() || element.hasTagName(legendTag) || element.hasTagName(meterTag) || element.hasTagName(progressTag))
260            return true;
261        if (equalIgnoringCase(element.fastGetAttribute(roleAttr), "img"))
262            return true;
263    }
264
265    return false;
266}
267
268// --------
269
270inline void TextIteratorCopyableText::reset()
271{
272    m_singleCharacter = 0;
273    m_string = String();
274    m_offset = 0;
275    m_length = 0;
276}
277
278inline void TextIteratorCopyableText::set(String&& string)
279{
280    m_singleCharacter = 0;
281    m_string = WTF::move(string);
282    m_offset = 0;
283    m_length = m_string.length();
284}
285
286inline void TextIteratorCopyableText::set(String&& string, unsigned offset, unsigned length)
287{
288    ASSERT(offset < string.length());
289    ASSERT(length);
290    ASSERT(length <= string.length() - offset);
291
292    m_singleCharacter = 0;
293    m_string = WTF::move(string);
294    m_offset = offset;
295    m_length = length;
296}
297
298inline void TextIteratorCopyableText::set(UChar singleCharacter)
299{
300    m_singleCharacter = singleCharacter;
301    m_string = String();
302    m_offset = 0;
303    m_length = 0;
304}
305
306void TextIteratorCopyableText::appendToStringBuilder(StringBuilder& builder) const
307{
308    if (m_singleCharacter)
309        builder.append(m_singleCharacter);
310    else
311        builder.append(m_string, m_offset, m_length);
312}
313
314// --------
315
316TextIterator::TextIterator(const Range* range, TextIteratorBehavior behavior)
317    : m_behavior(behavior)
318    , m_handledNode(false)
319    , m_handledChildren(false)
320    , m_startContainer(nullptr)
321    , m_startOffset(0)
322    , m_endContainer(nullptr)
323    , m_endOffset(0)
324    , m_positionNode(nullptr)
325    , m_needsAnotherNewline(false)
326    , m_textBox(nullptr)
327    , m_remainingTextBox(nullptr)
328    , m_firstLetterText(nullptr)
329    , m_lastTextNode(nullptr)
330    , m_lastTextNodeEndedWithCollapsedSpace(false)
331    , m_lastCharacter(0)
332    , m_sortedTextBoxesPosition(0)
333    , m_hasEmitted(false)
334    , m_handledFirstLetter(false)
335{
336    // FIXME: Only m_positionNode above needs to be initialized if range is null.
337    if (!range)
338        return;
339
340    range->ownerDocument().updateLayoutIgnorePendingStylesheets();
341
342    m_startContainer = range->startContainer();
343    if (!m_startContainer)
344        return;
345    ASSERT(range->endContainer());
346
347    // Callers should be handing us well-formed ranges. If we discover that this isn't
348    // the case, we could consider changing this assertion to an early return.
349    ASSERT(range->boundaryPointsValid());
350
351    m_startOffset = range->startOffset();
352    m_endContainer = range->endContainer();
353    m_endOffset = range->endOffset();
354
355    // Set up the current node for processing.
356    m_node = range->firstNode();
357    if (!m_node)
358        return;
359    setUpFullyClippedStack(m_fullyClippedStack, *m_node);
360    m_offset = m_node == m_startContainer ? m_startOffset : 0;
361
362    m_pastEndNode = nextInPreOrderCrossingShadowBoundaries(*m_endContainer, m_endOffset);
363
364#ifndef NDEBUG
365    // Need this just because of the assert in advance().
366    m_positionNode = m_node;
367#endif
368
369    advance();
370}
371
372TextIterator::~TextIterator()
373{
374}
375
376void TextIterator::advance()
377{
378    ASSERT(!atEnd());
379
380    // reset the run information
381    m_positionNode = nullptr;
382    m_copyableText.reset();
383    m_text = StringView();
384
385    // handle remembered node that needed a newline after the text node's newline
386    if (m_needsAnotherNewline) {
387        // Emit the extra newline, and position it *inside* m_node, after m_node's
388        // contents, in case it's a block, in the same way that we position the first
389        // newline. The range for the emitted newline should start where the line
390        // break begins.
391        // FIXME: It would be cleaner if we emitted two newlines during the last
392        // iteration, instead of using m_needsAnotherNewline.
393        Node& baseNode = m_node->lastChild() ? *m_node->lastChild() : *m_node;
394        emitCharacter('\n', *baseNode.parentNode(), &baseNode, 1, 1);
395        m_needsAnotherNewline = false;
396        return;
397    }
398
399    if (!m_textBox && m_remainingTextBox) {
400        m_textBox = m_remainingTextBox;
401        m_remainingTextBox = nullptr;
402        m_firstLetterText = nullptr;
403        m_offset = 0;
404    }
405    // handle remembered text box
406    if (m_textBox) {
407        handleTextBox();
408        if (m_positionNode)
409            return;
410    }
411
412    while (m_node && m_node != m_pastEndNode) {
413        if ((m_behavior & TextIteratorStopsOnFormControls) && HTMLFormControlElement::enclosingFormControlElement(m_node))
414            return;
415
416        // if the range ends at offset 0 of an element, represent the
417        // position, but not the content, of that element e.g. if the
418        // node is a blockflow element, emit a newline that
419        // precedes the element
420        if (m_node == m_endContainer && !m_endOffset) {
421            representNodeOffsetZero();
422            m_node = nullptr;
423            return;
424        }
425
426        auto* renderer = m_node->renderer();
427        if (!renderer) {
428            m_handledNode = true;
429            m_handledChildren = true;
430        } else {
431            // handle current node according to its type
432            if (!m_handledNode) {
433                if (renderer->isText() && m_node->isTextNode())
434                    m_handledNode = handleTextNode();
435                else if (isRendererReplacedElement(renderer))
436                    m_handledNode = handleReplacedElement();
437                else
438                    m_handledNode = handleNonTextNode();
439                if (m_positionNode)
440                    return;
441            }
442        }
443
444        // find a new current node to handle in depth-first manner,
445        // calling exitNode() as we come back thru a parent node
446        Node* next = m_handledChildren ? nullptr : m_node->firstChild();
447        m_offset = 0;
448        if (!next) {
449            next = m_node->nextSibling();
450            if (!next) {
451                bool pastEnd = NodeTraversal::next(m_node) == m_pastEndNode;
452                Node* parentNode = m_node->parentOrShadowHostNode();
453                while (!next && parentNode) {
454                    if ((pastEnd && parentNode == m_endContainer) || m_endContainer->isDescendantOf(parentNode))
455                        return;
456                    bool haveRenderer = m_node->renderer();
457                    m_node = parentNode;
458                    m_fullyClippedStack.pop();
459                    parentNode = m_node->parentOrShadowHostNode();
460                    if (haveRenderer)
461                        exitNode();
462                    if (m_positionNode) {
463                        m_handledNode = true;
464                        m_handledChildren = true;
465                        return;
466                    }
467                    next = m_node->nextSibling();
468                }
469            }
470            m_fullyClippedStack.pop();
471        }
472
473        // set the new current node
474        m_node = next;
475        if (m_node)
476            pushFullyClippedState(m_fullyClippedStack, *m_node);
477        m_handledNode = false;
478        m_handledChildren = false;
479        m_handledFirstLetter = false;
480        m_firstLetterText = nullptr;
481
482        // how would this ever be?
483        if (m_positionNode)
484            return;
485    }
486}
487
488static bool hasVisibleTextNode(RenderText& renderer)
489{
490    if (renderer.style().visibility() == VISIBLE)
491        return true;
492    if (renderer.isTextFragment()) {
493        if (auto firstLetter = toRenderTextFragment(renderer).firstLetter()) {
494            if (firstLetter->style().visibility() == VISIBLE)
495                return true;
496        }
497    }
498    return false;
499}
500
501bool TextIterator::handleTextNode()
502{
503    Text& textNode = toText(*m_node);
504
505    if (m_fullyClippedStack.top() && !(m_behavior & TextIteratorIgnoresStyleVisibility))
506        return false;
507
508    auto& renderer = *textNode.renderer();
509
510    m_lastTextNode = &textNode;
511    String rendererText = renderer.text();
512
513    // handle pre-formatted text
514    if (!renderer.style().collapseWhiteSpace()) {
515        int runStart = m_offset;
516        if (m_lastTextNodeEndedWithCollapsedSpace && hasVisibleTextNode(renderer)) {
517            emitCharacter(' ', textNode, nullptr, runStart, runStart);
518            return false;
519        }
520        if (!m_handledFirstLetter && renderer.isTextFragment() && !m_offset) {
521            handleTextNodeFirstLetter(toRenderTextFragment(renderer));
522            if (m_firstLetterText) {
523                String firstLetter = m_firstLetterText->text();
524                emitText(textNode, *m_firstLetterText, m_offset, m_offset + firstLetter.length());
525                m_firstLetterText = nullptr;
526                m_textBox = nullptr;
527                return false;
528            }
529        }
530        if (renderer.style().visibility() != VISIBLE && !(m_behavior & TextIteratorIgnoresStyleVisibility))
531            return false;
532        int rendererTextLength = rendererText.length();
533        int end = (&textNode == m_endContainer) ? m_endOffset : INT_MAX;
534        int runEnd = std::min(rendererTextLength, end);
535
536        if (runStart >= runEnd)
537            return true;
538
539        emitText(textNode, renderer, runStart, runEnd);
540        return true;
541    }
542
543    if (renderer.simpleLineLayout()) {
544        if (renderer.style().visibility() != VISIBLE && !(m_behavior & TextIteratorIgnoresStyleVisibility))
545            return true;
546        // This code aims to produce same results as handleTextBox() below so test results don't change. It does not make much logical sense.
547        unsigned end = (m_node == m_endContainer) ? static_cast<unsigned>(m_endOffset) : rendererText.length();
548        unsigned runEnd = m_offset;
549        unsigned runStart = m_offset;
550        while (runEnd < end && (renderer.style().isCollapsibleWhiteSpace(rendererText[runEnd]) || rendererText[runEnd] == '\t'))
551            ++runEnd;
552        bool addSpaceForPrevious = m_lastTextNodeEndedWithCollapsedSpace && m_lastCharacter && !renderer.style().isCollapsibleWhiteSpace(m_lastCharacter);
553        if (runEnd > runStart || addSpaceForPrevious) {
554            if (runEnd == rendererText.length()) {
555                m_lastTextNodeEndedWithCollapsedSpace = true;
556                return true;
557            }
558            bool addSpaceForCurrent = runStart || (m_lastCharacter && !renderer.style().isCollapsibleWhiteSpace(m_lastCharacter));
559            if (addSpaceForCurrent || addSpaceForPrevious) {
560                emitCharacter(' ', textNode, nullptr, runStart, runEnd);
561                m_offset = runEnd;
562                return false;
563            }
564            runStart = runEnd;
565        }
566        while (runEnd < end && !renderer.style().isCollapsibleWhiteSpace(rendererText[runEnd]))
567            ++runEnd;
568        if (runStart < end)
569            emitText(textNode, renderer, runStart, runEnd);
570        m_offset = runEnd;
571        return runEnd == end;
572    }
573
574    if (renderer.firstTextBox())
575        m_textBox = renderer.firstTextBox();
576
577    bool shouldHandleFirstLetter = !m_handledFirstLetter && renderer.isTextFragment() && !m_offset;
578    if (shouldHandleFirstLetter)
579        handleTextNodeFirstLetter(toRenderTextFragment(renderer));
580
581    if (!renderer.firstTextBox() && rendererText.length() && !shouldHandleFirstLetter) {
582        if (renderer.style().visibility() != VISIBLE && !(m_behavior & TextIteratorIgnoresStyleVisibility))
583            return false;
584        m_lastTextNodeEndedWithCollapsedSpace = true; // entire block is collapsed space
585        return true;
586    }
587
588    // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text)
589    auto& boxesRenderer = m_firstLetterText ? *m_firstLetterText : renderer;
590    if (boxesRenderer.containsReversedText()) {
591        m_sortedTextBoxes.clear();
592        for (InlineTextBox* textBox = boxesRenderer.firstTextBox(); textBox; textBox = textBox->nextTextBox())
593            m_sortedTextBoxes.append(textBox);
594        std::sort(m_sortedTextBoxes.begin(), m_sortedTextBoxes.end(), InlineTextBox::compareByStart);
595        m_sortedTextBoxesPosition = 0;
596        m_textBox = m_sortedTextBoxes.isEmpty() ? nullptr : m_sortedTextBoxes[0];
597    }
598
599    handleTextBox();
600    return true;
601}
602
603void TextIterator::handleTextBox()
604{
605    Text& textNode = toText(*m_node);
606
607    auto& renderer = m_firstLetterText ? *m_firstLetterText : *textNode.renderer();
608    if (renderer.style().visibility() != VISIBLE && !(m_behavior & TextIteratorIgnoresStyleVisibility)) {
609        m_textBox = nullptr;
610        return;
611    }
612    String rendererText = renderer.text();
613    unsigned start = m_offset;
614    unsigned end = (&textNode == m_endContainer) ? static_cast<unsigned>(m_endOffset) : UINT_MAX;
615    while (m_textBox) {
616        unsigned textBoxStart = m_textBox->start();
617        unsigned runStart = std::max(textBoxStart, start);
618
619        // Check for collapsed space at the start of this run.
620        InlineTextBox* firstTextBox = renderer.containsReversedText() ? (m_sortedTextBoxes.isEmpty() ? nullptr : m_sortedTextBoxes[0]) : renderer.firstTextBox();
621        bool needSpace = m_lastTextNodeEndedWithCollapsedSpace || (m_textBox == firstTextBox && textBoxStart == runStart && runStart);
622        if (needSpace && !renderer.style().isCollapsibleWhiteSpace(m_lastCharacter) && m_lastCharacter) {
623            if (m_lastTextNode == &textNode && runStart && rendererText[runStart - 1] == ' ') {
624                unsigned spaceRunStart = runStart - 1;
625                while (spaceRunStart && rendererText[spaceRunStart - 1] == ' ')
626                    --spaceRunStart;
627                emitText(textNode, renderer, spaceRunStart, spaceRunStart + 1);
628            } else
629                emitCharacter(' ', textNode, nullptr, runStart, runStart);
630            return;
631        }
632        unsigned textBoxEnd = textBoxStart + m_textBox->len();
633        unsigned runEnd = std::min(textBoxEnd, end);
634
635        // Determine what the next text box will be, but don't advance yet
636        InlineTextBox* nextTextBox = nullptr;
637        if (renderer.containsReversedText()) {
638            if (m_sortedTextBoxesPosition + 1 < m_sortedTextBoxes.size())
639                nextTextBox = m_sortedTextBoxes[m_sortedTextBoxesPosition + 1];
640        } else
641            nextTextBox = m_textBox->nextTextBox();
642        ASSERT(!nextTextBox || &nextTextBox->renderer() == &renderer);
643
644        if (runStart < runEnd) {
645            // Handle either a single newline character (which becomes a space),
646            // or a run of characters that does not include a newline.
647            // This effectively translates newlines to spaces without copying the text.
648            if (rendererText[runStart] == '\n') {
649                emitCharacter(' ', textNode, nullptr, runStart, runStart + 1);
650                m_offset = runStart + 1;
651            } else {
652                size_t subrunEnd = rendererText.find('\n', runStart);
653                if (subrunEnd == notFound || subrunEnd > runEnd) {
654                    subrunEnd = runEnd;
655                    bool lastSpaceCollapsedByNextNonTextBox = !nextTextBox && (m_behavior & TextIteratorBehavesAsIfNodesFollowing) && rendererText.length() > runEnd;
656                    if (lastSpaceCollapsedByNextNonTextBox)
657                        ++subrunEnd; // runEnd stopped before last space. Increment by one to restore the space.
658                }
659                m_offset = subrunEnd;
660                emitText(textNode, renderer, runStart, subrunEnd);
661            }
662
663            // If we are doing a subrun that doesn't go to the end of the text box,
664            // come back again to finish handling this text box; don't advance to the next one.
665            if (static_cast<unsigned>(m_positionEndOffset) < textBoxEnd)
666                return;
667
668            // Advance and return
669            unsigned nextRunStart = nextTextBox ? nextTextBox->start() : rendererText.length();
670            if (nextRunStart > runEnd)
671                m_lastTextNodeEndedWithCollapsedSpace = true; // collapsed space between runs or at the end
672            m_textBox = nextTextBox;
673            if (renderer.containsReversedText())
674                ++m_sortedTextBoxesPosition;
675            return;
676        }
677        // Advance and continue
678        m_textBox = nextTextBox;
679        if (renderer.containsReversedText())
680            ++m_sortedTextBoxesPosition;
681    }
682    if (!m_textBox && m_remainingTextBox) {
683        m_textBox = m_remainingTextBox;
684        m_remainingTextBox = nullptr;
685        m_firstLetterText = nullptr;
686        m_offset = 0;
687        handleTextBox();
688    }
689}
690
691static inline RenderText* firstRenderTextInFirstLetter(RenderBoxModelObject* firstLetter)
692{
693    if (!firstLetter)
694        return nullptr;
695
696    // FIXME: Should this check descendent objects?
697    return childrenOfType<RenderText>(*firstLetter).first();
698}
699
700void TextIterator::handleTextNodeFirstLetter(RenderTextFragment& renderer)
701{
702    if (auto* firstLetter = renderer.firstLetter()) {
703        if (firstLetter->style().visibility() != VISIBLE && !(m_behavior & TextIteratorIgnoresStyleVisibility))
704            return;
705        if (auto* firstLetterText = firstRenderTextInFirstLetter(firstLetter)) {
706            m_handledFirstLetter = true;
707            m_remainingTextBox = m_textBox;
708            m_textBox = firstLetterText->firstTextBox();
709            m_sortedTextBoxes.clear();
710            m_firstLetterText = firstLetterText;
711        }
712    }
713    m_handledFirstLetter = true;
714}
715
716bool TextIterator::handleReplacedElement()
717{
718    if (m_fullyClippedStack.top())
719        return false;
720
721    auto& renderer = *m_node->renderer();
722    if (renderer.style().visibility() != VISIBLE && !(m_behavior & TextIteratorIgnoresStyleVisibility))
723        return false;
724
725    if (m_lastTextNodeEndedWithCollapsedSpace) {
726        emitCharacter(' ', *m_lastTextNode->parentNode(), m_lastTextNode, 1, 1);
727        return false;
728    }
729
730    if ((m_behavior & TextIteratorEntersTextControls) && renderer.isTextControl()) {
731        if (TextControlInnerTextElement* innerTextElement = toRenderTextControl(renderer).textFormControlElement().innerTextElement()) {
732            m_node = innerTextElement->containingShadowRoot();
733            pushFullyClippedState(m_fullyClippedStack, *m_node);
734            m_offset = 0;
735            return false;
736        }
737    }
738
739    m_hasEmitted = true;
740
741    if ((m_behavior & TextIteratorEmitsObjectReplacementCharacters) && renderer.isReplaced()) {
742        emitCharacter(objectReplacementCharacter, *m_node->parentNode(), m_node, 0, 1);
743        // Don't process subtrees for embedded objects. If the text there is required,
744        // it must be explicitly asked by specifying a range falling inside its boundaries.
745        m_handledChildren = true;
746        return true;
747    }
748
749    if (m_behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions) {
750        // We want replaced elements to behave like punctuation for boundary
751        // finding, and to simply take up space for the selection preservation
752        // code in moveParagraphs, so we use a comma.
753        emitCharacter(',', *m_node->parentNode(), m_node, 0, 1);
754        return true;
755    }
756
757    m_positionNode = m_node->parentNode();
758    m_positionOffsetBaseNode = m_node;
759    m_positionStartOffset = 0;
760    m_positionEndOffset = 1;
761
762    if ((m_behavior & TextIteratorEmitsImageAltText) && renderer.isRenderImage()) {
763        String altText = toRenderImage(renderer).altText();
764        if (unsigned length = altText.length()) {
765            m_lastCharacter = altText[length - 1];
766            m_copyableText.set(WTF::move(altText));
767            m_text = m_copyableText.text();
768            return true;
769        }
770    }
771
772    m_copyableText.reset();
773    m_text = StringView();
774    m_lastCharacter = 0;
775    return true;
776}
777
778static bool shouldEmitTabBeforeNode(Node& node)
779{
780    auto* renderer = node.renderer();
781
782    // Table cells are delimited by tabs.
783    if (!renderer || !isTableCell(&node))
784        return false;
785
786    // Want a tab before every cell other than the first one.
787    RenderTableCell& cell = toRenderTableCell(*renderer);
788    RenderTable* table = cell.table();
789    return table && (table->cellBefore(&cell) || table->cellAbove(&cell));
790}
791
792static bool shouldEmitNewlineForNode(Node* node, bool emitsOriginalText)
793{
794    auto* renderer = node->renderer();
795    if (!(renderer ? renderer->isBR() : node->hasTagName(brTag)))
796        return false;
797    return emitsOriginalText || !(node->isInShadowTree() && node->shadowHost()->toInputElement());
798}
799
800static bool hasHeaderTag(HTMLElement& element)
801{
802    return element.hasTagName(h1Tag)
803        || element.hasTagName(h2Tag)
804        || element.hasTagName(h3Tag)
805        || element.hasTagName(h4Tag)
806        || element.hasTagName(h5Tag)
807        || element.hasTagName(h6Tag);
808}
809
810static bool shouldEmitNewlinesBeforeAndAfterNode(Node& node)
811{
812    // Block flow (versus inline flow) is represented by having
813    // a newline both before and after the element.
814    auto* renderer = node.renderer();
815    if (!renderer) {
816        if (!node.isHTMLElement())
817            return false;
818        auto& element = toHTMLElement(node);
819        return hasHeaderTag(element)
820            || element.hasTagName(blockquoteTag)
821            || element.hasTagName(ddTag)
822            || element.hasTagName(divTag)
823            || element.hasTagName(dlTag)
824            || element.hasTagName(dtTag)
825            || element.hasTagName(hrTag)
826            || element.hasTagName(liTag)
827            || element.hasTagName(listingTag)
828            || element.hasTagName(olTag)
829            || element.hasTagName(pTag)
830            || element.hasTagName(preTag)
831            || element.hasTagName(trTag)
832            || element.hasTagName(ulTag);
833    }
834
835    // Need to make an exception for table cells, because they are blocks, but we
836    // want them tab-delimited rather than having newlines before and after.
837    if (isTableCell(&node))
838        return false;
839
840    // Need to make an exception for table row elements, because they are neither
841    // "inline" or "RenderBlock", but we want newlines for them.
842    if (renderer->isTableRow()) {
843        RenderTable* table = toRenderTableRow(*renderer).table();
844        if (table && !table->isInline())
845            return true;
846    }
847
848    return !renderer->isInline()
849        && renderer->isRenderBlock()
850        && !renderer->isFloatingOrOutOfFlowPositioned()
851        && !renderer->isBody()
852        && !renderer->isRubyText();
853}
854
855static bool shouldEmitNewlineAfterNode(Node& node)
856{
857    // FIXME: It should be better but slower to create a VisiblePosition here.
858    if (!shouldEmitNewlinesBeforeAndAfterNode(node))
859        return false;
860    // Check if this is the very last renderer in the document.
861    // If so, then we should not emit a newline.
862    Node* subsequentNode = &node;
863    while ((subsequentNode = NodeTraversal::nextSkippingChildren(subsequentNode))) {
864        if (subsequentNode->renderer())
865            return true;
866    }
867    return false;
868}
869
870static bool shouldEmitNewlineBeforeNode(Node& node)
871{
872    return shouldEmitNewlinesBeforeAndAfterNode(node);
873}
874
875static bool shouldEmitExtraNewlineForNode(Node& node)
876{
877    // When there is a significant collapsed bottom margin, emit an extra
878    // newline for a more realistic result. We end up getting the right
879    // result even without margin collapsing. For example: <div><p>text</p></div>
880    // will work right even if both the <div> and the <p> have bottom margins.
881
882    auto* renderer = node.renderer();
883    if (!renderer || !renderer->isBox())
884        return false;
885
886    // NOTE: We only do this for a select set of nodes, and WinIE appears not to do this at all.
887    if (!node.isHTMLElement())
888        return false;
889    if (!(hasHeaderTag(toHTMLElement(node)) || toHTMLElement(node).hasTagName(pTag)))
890        return false;
891
892    int bottomMargin = toRenderBox(renderer)->collapsedMarginAfter();
893    int fontSize = toRenderBox(renderer)->style().fontDescription().computedPixelSize();
894    return bottomMargin * 2 >= fontSize;
895}
896
897static int collapsedSpaceLength(RenderText& renderer, int textEnd)
898{
899    StringImpl& text = *renderer.text();
900    unsigned length = text.length();
901    for (unsigned i = textEnd; i < length; ++i) {
902        if (!renderer.style().isCollapsibleWhiteSpace(text[i]))
903            return i - textEnd;
904    }
905    return length - textEnd;
906}
907
908static int maxOffsetIncludingCollapsedSpaces(Node& node)
909{
910    int offset = caretMaxOffset(&node);
911    if (auto* renderer = node.renderer()) {
912        if (renderer->isText())
913            offset += collapsedSpaceLength(toRenderText(*renderer), offset);
914    }
915    return offset;
916}
917
918// Whether or not we should emit a character as we enter m_node (if it's a container) or as we hit it (if it's atomic).
919bool TextIterator::shouldRepresentNodeOffsetZero()
920{
921    if ((m_behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions) && m_node->renderer() && m_node->renderer()->isTable())
922        return true;
923
924    // Leave element positioned flush with start of a paragraph
925    // (e.g. do not insert tab before a table cell at the start of a paragraph)
926    if (m_lastCharacter == '\n')
927        return false;
928
929    // Otherwise, show the position if we have emitted any characters
930    if (m_hasEmitted)
931        return true;
932
933    // We've not emitted anything yet. Generally, there is no need for any positioning then.
934    // The only exception is when the element is visually not in the same line as
935    // the start of the range (e.g. the range starts at the end of the previous paragraph).
936    // NOTE: Creating VisiblePositions and comparing them is relatively expensive, so we
937    // make quicker checks to possibly avoid that. Another check that we could make is
938    // is whether the inline vs block flow changed since the previous visible element.
939    // I think we're already in a special enough case that that won't be needed, tho.
940
941    // No character needed if this is the first node in the range.
942    if (m_node == m_startContainer)
943        return false;
944
945    // If we are outside the start container's subtree, assume we need to emit.
946    // FIXME: m_startContainer could be an inline block
947    if (!m_node->isDescendantOf(m_startContainer))
948        return true;
949
950    // If we started as m_startContainer offset 0 and the current node is a descendant of
951    // the start container, we already had enough context to correctly decide whether to
952    // emit after a preceding block. We chose not to emit (m_hasEmitted is false),
953    // so don't second guess that now.
954    // NOTE: Is this really correct when m_node is not a leftmost descendant? Probably
955    // immaterial since we likely would have already emitted something by now.
956    if (m_startOffset == 0)
957        return false;
958
959    // If this node is unrendered or invisible the VisiblePosition checks below won't have much meaning.
960    // Additionally, if the range we are iterating over contains huge sections of unrendered content,
961    // we would create VisiblePositions on every call to this function without this check.
962    if (!m_node->renderer() || m_node->renderer()->style().visibility() != VISIBLE
963        || (m_node->renderer()->isRenderBlockFlow() && !toRenderBlock(m_node->renderer())->height() && !m_node->hasTagName(bodyTag)))
964        return false;
965
966    // The startPos.isNotNull() check is needed because the start could be before the body,
967    // and in that case we'll get null. We don't want to put in newlines at the start in that case.
968    // The currPos.isNotNull() check is needed because positions in non-HTML content
969    // (like SVG) do not have visible positions, and we don't want to emit for them either.
970    VisiblePosition startPos = VisiblePosition(Position(m_startContainer, m_startOffset, Position::PositionIsOffsetInAnchor), DOWNSTREAM);
971    VisiblePosition currPos = VisiblePosition(positionBeforeNode(m_node), DOWNSTREAM);
972    return startPos.isNotNull() && currPos.isNotNull() && !inSameLine(startPos, currPos);
973}
974
975bool TextIterator::shouldEmitSpaceBeforeAndAfterNode(Node& node)
976{
977    return node.renderer() && node.renderer()->isTable() && (node.renderer()->isInline() || (m_behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions));
978}
979
980void TextIterator::representNodeOffsetZero()
981{
982    // Emit a character to show the positioning of m_node.
983
984    // When we haven't been emitting any characters, shouldRepresentNodeOffsetZero() can
985    // create VisiblePositions, which is expensive. So, we perform the inexpensive checks
986    // on m_node to see if it necessitates emitting a character first and will early return
987    // before encountering shouldRepresentNodeOffsetZero()s worse case behavior.
988    if (shouldEmitTabBeforeNode(*m_node)) {
989        if (shouldRepresentNodeOffsetZero())
990            emitCharacter('\t', *m_node->parentNode(), m_node, 0, 0);
991    } else if (shouldEmitNewlineBeforeNode(*m_node)) {
992        if (shouldRepresentNodeOffsetZero())
993            emitCharacter('\n', *m_node->parentNode(), m_node, 0, 0);
994    } else if (shouldEmitSpaceBeforeAndAfterNode(*m_node)) {
995        if (shouldRepresentNodeOffsetZero())
996            emitCharacter(' ', *m_node->parentNode(), m_node, 0, 0);
997    }
998}
999
1000bool TextIterator::handleNonTextNode()
1001{
1002    if (shouldEmitNewlineForNode(m_node, m_behavior & TextIteratorEmitsOriginalText))
1003        emitCharacter('\n', *m_node->parentNode(), m_node, 0, 1);
1004    else if ((m_behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions) && m_node->renderer() && m_node->renderer()->isHR())
1005        emitCharacter(' ', *m_node->parentNode(), m_node, 0, 1);
1006    else
1007        representNodeOffsetZero();
1008
1009    return true;
1010}
1011
1012void TextIterator::exitNode()
1013{
1014    // prevent emitting a newline when exiting a collapsed block at beginning of the range
1015    // FIXME: !m_hasEmitted does not necessarily mean there was a collapsed block... it could
1016    // have been an hr (e.g.). Also, a collapsed block could have height (e.g. a table) and
1017    // therefore look like a blank line.
1018    if (!m_hasEmitted)
1019        return;
1020
1021    // Emit with a position *inside* m_node, after m_node's contents, in
1022    // case it is a block, because the run should start where the
1023    // emitted character is positioned visually.
1024    Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node;
1025    // FIXME: This shouldn't require the m_lastTextNode to be true, but we can't change that without making
1026    // the logic in _web_attributedStringFromRange match. We'll get that for free when we switch to use
1027    // TextIterator in _web_attributedStringFromRange.
1028    // See <rdar://problem/5428427> for an example of how this mismatch will cause problems.
1029    if (m_lastTextNode && shouldEmitNewlineAfterNode(*m_node)) {
1030        // use extra newline to represent margin bottom, as needed
1031        bool addNewline = shouldEmitExtraNewlineForNode(*m_node);
1032
1033        // FIXME: We need to emit a '\n' as we leave an empty block(s) that
1034        // contain a VisiblePosition when doing selection preservation.
1035        if (m_lastCharacter != '\n') {
1036            // insert a newline with a position following this block's contents.
1037            emitCharacter('\n', *baseNode->parentNode(), baseNode, 1, 1);
1038            // remember whether to later add a newline for the current node
1039            ASSERT(!m_needsAnotherNewline);
1040            m_needsAnotherNewline = addNewline;
1041        } else if (addNewline)
1042            // insert a newline with a position following this block's contents.
1043            emitCharacter('\n', *baseNode->parentNode(), baseNode, 1, 1);
1044    }
1045
1046    // If nothing was emitted, see if we need to emit a space.
1047    if (!m_positionNode && shouldEmitSpaceBeforeAndAfterNode(*m_node))
1048        emitCharacter(' ', *baseNode->parentNode(), baseNode, 1, 1);
1049}
1050
1051void TextIterator::emitCharacter(UChar character, Node& characterNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset)
1052{
1053    m_hasEmitted = true;
1054
1055    // remember information with which to construct the TextIterator::range()
1056    m_positionNode = &characterNode;
1057    m_positionOffsetBaseNode = offsetBaseNode;
1058    m_positionStartOffset = textStartOffset;
1059    m_positionEndOffset = textEndOffset;
1060
1061    m_copyableText.set(character);
1062    m_text = m_copyableText.text();
1063    m_lastCharacter = character;
1064    m_lastTextNodeEndedWithCollapsedSpace = false;
1065}
1066
1067void TextIterator::emitText(Text& textNode, RenderText& renderer, int textStartOffset, int textEndOffset)
1068{
1069    ASSERT(textStartOffset >= 0);
1070    ASSERT(textEndOffset >= 0);
1071    ASSERT(textStartOffset <= textEndOffset);
1072
1073    // FIXME: This probably yields the wrong offsets when text-transform: lowercase turns a single character into two characters.
1074    String string = (m_behavior & TextIteratorEmitsOriginalText) ? renderer.originalText()
1075        : ((m_behavior & TextIteratorEmitsTextsWithoutTranscoding) ? renderer.textWithoutConvertingBackslashToYenSymbol() : renderer.text());
1076
1077    ASSERT(string.length() >= static_cast<unsigned>(textEndOffset));
1078
1079    m_positionNode = &textNode;
1080    m_positionOffsetBaseNode = nullptr;
1081    m_positionStartOffset = textStartOffset;
1082    m_positionEndOffset = textEndOffset;
1083
1084    m_lastCharacter = string[textEndOffset - 1];
1085    m_copyableText.set(WTF::move(string), textStartOffset, textEndOffset - textStartOffset);
1086    m_text = m_copyableText.text();
1087
1088    m_lastTextNodeEndedWithCollapsedSpace = false;
1089    m_hasEmitted = true;
1090}
1091
1092PassRefPtr<Range> TextIterator::range() const
1093{
1094    ASSERT(!atEnd());
1095
1096    // use the current run information, if we have it
1097    if (m_positionOffsetBaseNode) {
1098        int index = m_positionOffsetBaseNode->nodeIndex();
1099        m_positionStartOffset += index;
1100        m_positionEndOffset += index;
1101        m_positionOffsetBaseNode = nullptr;
1102    }
1103    return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset);
1104}
1105
1106Node* TextIterator::node() const
1107{
1108    RefPtr<Range> textRange = range();
1109
1110    Node* node = textRange->startContainer();
1111    if (node->offsetInCharacters())
1112        return node;
1113
1114    return node->childNode(textRange->startOffset());
1115}
1116
1117// --------
1118
1119SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Range& range, TextIteratorBehavior behavior)
1120    : m_behavior(behavior)
1121    , m_node(nullptr)
1122    , m_offset(0)
1123    , m_handledNode(false)
1124    , m_handledChildren(false)
1125    , m_startContainer(nullptr)
1126    , m_startOffset(0)
1127    , m_endContainer(nullptr)
1128    , m_endOffset(0)
1129    , m_positionNode(nullptr)
1130    , m_positionStartOffset(0)
1131    , m_positionEndOffset(0)
1132    , m_lastTextNode(nullptr)
1133    , m_lastCharacter(0)
1134    , m_havePassedStartContainer(false)
1135    , m_shouldHandleFirstLetter(false)
1136{
1137    ASSERT(behavior == TextIteratorDefaultBehavior || behavior == TextIteratorStopsOnFormControls);
1138
1139    range.ownerDocument().updateLayoutIgnorePendingStylesheets();
1140
1141    Node* startNode = range.startContainer();
1142    if (!startNode)
1143        return;
1144    Node* endNode = range.endContainer();
1145    int startOffset = range.startOffset();
1146    int endOffset = range.endOffset();
1147
1148    if (!startNode->offsetInCharacters()) {
1149        if (startOffset >= 0 && startOffset < static_cast<int>(startNode->childNodeCount())) {
1150            startNode = startNode->childNode(startOffset);
1151            startOffset = 0;
1152        }
1153    }
1154    if (!endNode->offsetInCharacters()) {
1155        if (endOffset > 0 && endOffset <= static_cast<int>(endNode->childNodeCount())) {
1156            endNode = endNode->childNode(endOffset - 1);
1157            endOffset = lastOffsetInNode(endNode);
1158        }
1159    }
1160
1161    m_node = endNode;
1162    setUpFullyClippedStack(m_fullyClippedStack, *m_node);
1163    m_offset = endOffset;
1164    m_handledNode = false;
1165    m_handledChildren = endOffset == 0;
1166
1167    m_startContainer = startNode;
1168    m_startOffset = startOffset;
1169    m_endContainer = endNode;
1170    m_endOffset = endOffset;
1171
1172#ifndef NDEBUG
1173    // Need this just because of the assert.
1174    m_positionNode = endNode;
1175#endif
1176
1177    m_lastTextNode = nullptr;
1178    m_lastCharacter = '\n';
1179
1180    m_havePassedStartContainer = false;
1181
1182    advance();
1183}
1184
1185void SimplifiedBackwardsTextIterator::advance()
1186{
1187    ASSERT(!atEnd());
1188
1189    m_positionNode = nullptr;
1190    m_copyableText.reset();
1191    m_text = StringView();
1192
1193    if ((m_behavior & TextIteratorStopsOnFormControls) && HTMLFormControlElement::enclosingFormControlElement(m_node))
1194        return;
1195
1196    while (m_node && !m_havePassedStartContainer) {
1197        // Don't handle node if we start iterating at [node, 0].
1198        if (!m_handledNode && !(m_node == m_endContainer && !m_endOffset)) {
1199            auto* renderer = m_node->renderer();
1200            if (renderer && renderer->isText() && m_node->isTextNode()) {
1201                if (renderer->style().visibility() == VISIBLE && m_offset > 0)
1202                    m_handledNode = handleTextNode();
1203            } else if (renderer && (renderer->isImage() || renderer->isWidget())) {
1204                if (renderer->style().visibility() == VISIBLE && m_offset > 0)
1205                    m_handledNode = handleReplacedElement();
1206            } else
1207                m_handledNode = handleNonTextNode();
1208            if (m_positionNode)
1209                return;
1210        }
1211
1212        if (!m_handledChildren && m_node->hasChildNodes()) {
1213            m_node = m_node->lastChild();
1214            pushFullyClippedState(m_fullyClippedStack, *m_node);
1215        } else {
1216            // Exit empty containers as we pass over them or containers
1217            // where [container, 0] is where we started iterating.
1218            if (!m_handledNode
1219                    && canHaveChildrenForEditing(m_node)
1220                    && m_node->parentNode()
1221                    && (!m_node->lastChild() || (m_node == m_endContainer && !m_endOffset))) {
1222                exitNode();
1223                if (m_positionNode) {
1224                    m_handledNode = true;
1225                    m_handledChildren = true;
1226                    return;
1227                }
1228            }
1229
1230            // Exit all other containers.
1231            while (!m_node->previousSibling()) {
1232                if (!advanceRespectingRange(m_node->parentOrShadowHostNode()))
1233                    break;
1234                m_fullyClippedStack.pop();
1235                exitNode();
1236                if (m_positionNode) {
1237                    m_handledNode = true;
1238                    m_handledChildren = true;
1239                    return;
1240                }
1241            }
1242
1243            m_fullyClippedStack.pop();
1244            if (advanceRespectingRange(m_node->previousSibling()))
1245                pushFullyClippedState(m_fullyClippedStack, *m_node);
1246            else
1247                m_node = nullptr;
1248        }
1249
1250        // For the purpose of word boundary detection,
1251        // we should iterate all visible text and trailing (collapsed) whitespaces.
1252        m_offset = m_node ? maxOffsetIncludingCollapsedSpaces(*m_node) : 0;
1253        m_handledNode = false;
1254        m_handledChildren = false;
1255
1256        if (m_positionNode)
1257            return;
1258    }
1259}
1260
1261bool SimplifiedBackwardsTextIterator::handleTextNode()
1262{
1263    Text& textNode = toText(*m_node);
1264
1265    m_lastTextNode = &textNode;
1266
1267    int startOffset;
1268    int offsetInNode;
1269    RenderText* renderer = handleFirstLetter(startOffset, offsetInNode);
1270    if (!renderer)
1271        return true;
1272
1273    String text = renderer->text();
1274    if (!renderer->hasRenderedText() && text.length())
1275        return true;
1276
1277    if (startOffset + offsetInNode == m_offset) {
1278        ASSERT(!m_shouldHandleFirstLetter);
1279        return true;
1280    }
1281
1282    m_positionEndOffset = m_offset;
1283    m_offset = startOffset + offsetInNode;
1284    m_positionNode = m_node;
1285    m_positionStartOffset = m_offset;
1286
1287    ASSERT(m_positionStartOffset < m_positionEndOffset);
1288    ASSERT(m_positionStartOffset - offsetInNode >= 0);
1289    ASSERT(m_positionEndOffset - offsetInNode > 0);
1290    ASSERT(m_positionEndOffset - offsetInNode <= static_cast<int>(text.length()));
1291
1292    m_lastCharacter = text[m_positionEndOffset - offsetInNode - 1];
1293    m_copyableText.set(WTF::move(text), m_positionStartOffset - offsetInNode, m_positionEndOffset - m_positionStartOffset);
1294    m_text = m_copyableText.text();
1295
1296    return !m_shouldHandleFirstLetter;
1297}
1298
1299RenderText* SimplifiedBackwardsTextIterator::handleFirstLetter(int& startOffset, int& offsetInNode)
1300{
1301    RenderText* renderer = toRenderText(m_node->renderer());
1302    startOffset = (m_node == m_startContainer) ? m_startOffset : 0;
1303
1304    if (!renderer->isTextFragment()) {
1305        offsetInNode = 0;
1306        return renderer;
1307    }
1308
1309    RenderTextFragment* fragment = toRenderTextFragment(renderer);
1310    int offsetAfterFirstLetter = fragment->start();
1311    if (startOffset >= offsetAfterFirstLetter) {
1312        ASSERT(!m_shouldHandleFirstLetter);
1313        offsetInNode = offsetAfterFirstLetter;
1314        return renderer;
1315    }
1316
1317    if (!m_shouldHandleFirstLetter && startOffset + offsetAfterFirstLetter < m_offset) {
1318        m_shouldHandleFirstLetter = true;
1319        offsetInNode = offsetAfterFirstLetter;
1320        return renderer;
1321    }
1322
1323    m_shouldHandleFirstLetter = false;
1324    offsetInNode = 0;
1325    return firstRenderTextInFirstLetter(fragment->firstLetter());
1326}
1327
1328bool SimplifiedBackwardsTextIterator::handleReplacedElement()
1329{
1330    unsigned index = m_node->nodeIndex();
1331    // We want replaced elements to behave like punctuation for boundary
1332    // finding, and to simply take up space for the selection preservation
1333    // code in moveParagraphs, so we use a comma. Unconditionally emit
1334    // here because this iterator is only used for boundary finding.
1335    emitCharacter(',', *m_node->parentNode(), index, index + 1);
1336    return true;
1337}
1338
1339bool SimplifiedBackwardsTextIterator::handleNonTextNode()
1340{
1341    // We can use a linefeed in place of a tab because this simple iterator is only used to
1342    // find boundaries, not actual content. A linefeed breaks words, sentences, and paragraphs.
1343    if (shouldEmitNewlineForNode(m_node, m_behavior & TextIteratorEmitsOriginalText) || shouldEmitNewlineAfterNode(*m_node) || shouldEmitTabBeforeNode(*m_node)) {
1344        unsigned index = m_node->nodeIndex();
1345        // The start of this emitted range is wrong. Ensuring correctness would require
1346        // VisiblePositions and so would be slow. previousBoundary expects this.
1347        emitCharacter('\n', *m_node->parentNode(), index + 1, index + 1);
1348    }
1349    return true;
1350}
1351
1352void SimplifiedBackwardsTextIterator::exitNode()
1353{
1354    if (shouldEmitNewlineForNode(m_node, m_behavior & TextIteratorEmitsOriginalText) || shouldEmitNewlineBeforeNode(*m_node) || shouldEmitTabBeforeNode(*m_node)) {
1355        // The start of this emitted range is wrong. Ensuring correctness would require
1356        // VisiblePositions and so would be slow. previousBoundary expects this.
1357        emitCharacter('\n', *m_node, 0, 0);
1358    }
1359}
1360
1361void SimplifiedBackwardsTextIterator::emitCharacter(UChar c, Node& node, int startOffset, int endOffset)
1362{
1363    m_positionNode = &node;
1364    m_positionStartOffset = startOffset;
1365    m_positionEndOffset = endOffset;
1366    m_copyableText.set(c);
1367    m_text = m_copyableText.text();
1368    m_lastCharacter = c;
1369}
1370
1371bool SimplifiedBackwardsTextIterator::advanceRespectingRange(Node* next)
1372{
1373    if (!next)
1374        return false;
1375    m_havePassedStartContainer |= m_node == m_startContainer;
1376    if (m_havePassedStartContainer)
1377        return false;
1378    m_node = next;
1379    return true;
1380}
1381
1382PassRefPtr<Range> SimplifiedBackwardsTextIterator::range() const
1383{
1384    ASSERT(!atEnd());
1385
1386    return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset);
1387}
1388
1389// --------
1390
1391CharacterIterator::CharacterIterator(const Range& range, TextIteratorBehavior behavior)
1392    : m_underlyingIterator(&range, behavior)
1393    , m_offset(0)
1394    , m_runOffset(0)
1395    , m_atBreak(true)
1396{
1397    while (!atEnd() && !m_underlyingIterator.text().length())
1398        m_underlyingIterator.advance();
1399}
1400
1401PassRefPtr<Range> CharacterIterator::range() const
1402{
1403    RefPtr<Range> r = m_underlyingIterator.range();
1404    if (!m_underlyingIterator.atEnd()) {
1405        if (m_underlyingIterator.text().length() <= 1) {
1406            ASSERT(m_runOffset == 0);
1407        } else {
1408            Node* n = r->startContainer();
1409            ASSERT(n == r->endContainer());
1410            int offset = r->startOffset() + m_runOffset;
1411            r->setStart(n, offset);
1412            r->setEnd(n, offset + 1);
1413        }
1414    }
1415    return r.release();
1416}
1417
1418void CharacterIterator::advance(int count)
1419{
1420    if (count <= 0) {
1421        ASSERT(count == 0);
1422        return;
1423    }
1424
1425    m_atBreak = false;
1426
1427    // easy if there is enough left in the current m_underlyingIterator run
1428    int remaining = m_underlyingIterator.text().length() - m_runOffset;
1429    if (count < remaining) {
1430        m_runOffset += count;
1431        m_offset += count;
1432        return;
1433    }
1434
1435    // exhaust the current m_underlyingIterator run
1436    count -= remaining;
1437    m_offset += remaining;
1438
1439    // move to a subsequent m_underlyingIterator run
1440    for (m_underlyingIterator.advance(); !atEnd(); m_underlyingIterator.advance()) {
1441        int runLength = m_underlyingIterator.text().length();
1442        if (!runLength)
1443            m_atBreak = true;
1444        else {
1445            // see whether this is m_underlyingIterator to use
1446            if (count < runLength) {
1447                m_runOffset = count;
1448                m_offset += count;
1449                return;
1450            }
1451
1452            // exhaust this m_underlyingIterator run
1453            count -= runLength;
1454            m_offset += runLength;
1455        }
1456    }
1457
1458    // ran to the end of the m_underlyingIterator... no more runs left
1459    m_atBreak = true;
1460    m_runOffset = 0;
1461}
1462
1463static PassRefPtr<Range> characterSubrange(CharacterIterator& it, int offset, int length)
1464{
1465    it.advance(offset);
1466    RefPtr<Range> start = it.range();
1467
1468    if (length > 1)
1469        it.advance(length - 1);
1470    RefPtr<Range> end = it.range();
1471
1472    return Range::create(start->startContainer()->document(),
1473        start->startContainer(), start->startOffset(),
1474        end->endContainer(), end->endOffset());
1475}
1476
1477BackwardsCharacterIterator::BackwardsCharacterIterator(const Range& range)
1478    : m_underlyingIterator(range, TextIteratorDefaultBehavior)
1479    , m_offset(0)
1480    , m_runOffset(0)
1481    , m_atBreak(true)
1482{
1483    while (!atEnd() && !m_underlyingIterator.text().length())
1484        m_underlyingIterator.advance();
1485}
1486
1487PassRefPtr<Range> BackwardsCharacterIterator::range() const
1488{
1489    RefPtr<Range> r = m_underlyingIterator.range();
1490    if (!m_underlyingIterator.atEnd()) {
1491        if (m_underlyingIterator.text().length() <= 1)
1492            ASSERT(m_runOffset == 0);
1493        else {
1494            Node* n = r->startContainer();
1495            ASSERT(n == r->endContainer());
1496            int offset = r->endOffset() - m_runOffset;
1497            r->setStart(n, offset - 1);
1498            r->setEnd(n, offset);
1499        }
1500    }
1501    return r.release();
1502}
1503
1504void BackwardsCharacterIterator::advance(int count)
1505{
1506    if (count <= 0) {
1507        ASSERT(!count);
1508        return;
1509    }
1510
1511    m_atBreak = false;
1512
1513    int remaining = m_underlyingIterator.text().length() - m_runOffset;
1514    if (count < remaining) {
1515        m_runOffset += count;
1516        m_offset += count;
1517        return;
1518    }
1519
1520    count -= remaining;
1521    m_offset += remaining;
1522
1523    for (m_underlyingIterator.advance(); !atEnd(); m_underlyingIterator.advance()) {
1524        int runLength = m_underlyingIterator.text().length();
1525        if (runLength == 0)
1526            m_atBreak = true;
1527        else {
1528            if (count < runLength) {
1529                m_runOffset = count;
1530                m_offset += count;
1531                return;
1532            }
1533
1534            count -= runLength;
1535            m_offset += runLength;
1536        }
1537    }
1538
1539    m_atBreak = true;
1540    m_runOffset = 0;
1541}
1542
1543// --------
1544
1545WordAwareIterator::WordAwareIterator(const Range& range)
1546    : m_underlyingIterator(&range)
1547    , m_didLookAhead(true) // so we consider the first chunk from the text iterator
1548{
1549    advance(); // get in position over the first chunk of text
1550}
1551
1552// We're always in one of these modes:
1553// - The current chunk in the text iterator is our current chunk
1554//      (typically its a piece of whitespace, or text that ended with whitespace)
1555// - The previous chunk in the text iterator is our current chunk
1556//      (we looked ahead to the next chunk and found a word boundary)
1557// - We built up our own chunk of text from many chunks from the text iterator
1558
1559// FIXME: Performance could be bad for huge spans next to each other that don't fall on word boundaries.
1560
1561void WordAwareIterator::advance()
1562{
1563    m_previousText.reset();
1564    m_buffer.clear();
1565
1566    // If last time we did a look-ahead, start with that looked-ahead chunk now
1567    if (!m_didLookAhead) {
1568        ASSERT(!m_underlyingIterator.atEnd());
1569        m_underlyingIterator.advance();
1570    }
1571    m_didLookAhead = false;
1572
1573    // Go to next non-empty chunk
1574    while (!m_underlyingIterator.atEnd() && !m_underlyingIterator.text().length())
1575        m_underlyingIterator.advance();
1576    if (m_underlyingIterator.atEnd())
1577        return;
1578
1579    while (1) {
1580        // If this chunk ends in whitespace we can just use it as our chunk.
1581        if (isSpaceOrNewline(m_underlyingIterator.text()[m_underlyingIterator.text().length() - 1]))
1582            return;
1583
1584        // If this is the first chunk that failed, save it in previousText before look ahead
1585        if (m_buffer.isEmpty())
1586            m_previousText = m_underlyingIterator.copyableText();
1587
1588        // Look ahead to next chunk. If it is whitespace or a break, we can use the previous stuff
1589        m_underlyingIterator.advance();
1590        if (m_underlyingIterator.atEnd() || !m_underlyingIterator.text().length() || isSpaceOrNewline(m_underlyingIterator.text()[0])) {
1591            m_didLookAhead = true;
1592            return;
1593        }
1594
1595        if (m_buffer.isEmpty()) {
1596            // Start gobbling chunks until we get to a suitable stopping point
1597            append(m_buffer, m_previousText.text());
1598            m_previousText.reset();
1599        }
1600        append(m_buffer, m_underlyingIterator.text());
1601    }
1602}
1603
1604StringView WordAwareIterator::text() const
1605{
1606    if (!m_buffer.isEmpty())
1607        return StringView(m_buffer.data(), m_buffer.size());
1608    if (m_previousText.text().length())
1609        return m_previousText.text();
1610    return m_underlyingIterator.text();
1611}
1612
1613// --------
1614
1615static inline UChar foldQuoteMark(UChar c)
1616{
1617    switch (c) {
1618        case hebrewPunctuationGershayim:
1619        case leftDoubleQuotationMark:
1620        case rightDoubleQuotationMark:
1621            return '"';
1622        case hebrewPunctuationGeresh:
1623        case leftSingleQuotationMark:
1624        case rightSingleQuotationMark:
1625            return '\'';
1626        default:
1627            return c;
1628    }
1629}
1630
1631// FIXME: We'd like to tailor the searcher to fold quote marks for us instead
1632// of doing it in a separate replacement pass here, but ICU doesn't offer a way
1633// to add tailoring on top of the locale-specific tailoring as of this writing.
1634static inline String foldQuoteMarks(String string)
1635{
1636    string.replace(hebrewPunctuationGeresh, '\'');
1637    string.replace(hebrewPunctuationGershayim, '"');
1638    string.replace(leftDoubleQuotationMark, '"');
1639    string.replace(leftSingleQuotationMark, '\'');
1640    string.replace(rightDoubleQuotationMark, '"');
1641    string.replace(rightSingleQuotationMark, '\'');
1642
1643    return string;
1644}
1645
1646#if !UCONFIG_NO_COLLATION
1647
1648const size_t minimumSearchBufferSize = 8192;
1649
1650#ifndef NDEBUG
1651static bool searcherInUse;
1652#endif
1653
1654static UStringSearch* createSearcher()
1655{
1656    // Provide a non-empty pattern and non-empty text so usearch_open will not fail,
1657    // but it doesn't matter exactly what it is, since we don't perform any searches
1658    // without setting both the pattern and the text.
1659    UErrorCode status = U_ZERO_ERROR;
1660    String searchCollatorName = makeString(currentSearchLocaleID(), "@collation=search");
1661    UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, searchCollatorName.utf8().data(), 0, &status);
1662    ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING);
1663    return searcher;
1664}
1665
1666static UStringSearch* searcher()
1667{
1668    static UStringSearch* searcher = createSearcher();
1669    return searcher;
1670}
1671
1672static inline void lockSearcher()
1673{
1674#ifndef NDEBUG
1675    ASSERT(!searcherInUse);
1676    searcherInUse = true;
1677#endif
1678}
1679
1680static inline void unlockSearcher()
1681{
1682#ifndef NDEBUG
1683    ASSERT(searcherInUse);
1684    searcherInUse = false;
1685#endif
1686}
1687
1688// ICU's search ignores the distinction between small kana letters and ones
1689// that are not small, and also characters that differ only in the voicing
1690// marks when considering only primary collation strength differences.
1691// This is not helpful for end users, since these differences make words
1692// distinct, so for our purposes we need these to be considered.
1693// The Unicode folks do not think the collation algorithm should be
1694// changed. To work around this, we would like to tailor the ICU searcher,
1695// but we can't get that to work yet. So instead, we check for cases where
1696// these differences occur, and skip those matches.
1697
1698// We refer to the above technique as the "kana workaround". The next few
1699// functions are helper functinos for the kana workaround.
1700
1701static inline bool isKanaLetter(UChar character)
1702{
1703    // Hiragana letters.
1704    if (character >= 0x3041 && character <= 0x3096)
1705        return true;
1706
1707    // Katakana letters.
1708    if (character >= 0x30A1 && character <= 0x30FA)
1709        return true;
1710    if (character >= 0x31F0 && character <= 0x31FF)
1711        return true;
1712
1713    // Halfwidth katakana letters.
1714    if (character >= 0xFF66 && character <= 0xFF9D && character != 0xFF70)
1715        return true;
1716
1717    return false;
1718}
1719
1720static inline bool isSmallKanaLetter(UChar character)
1721{
1722    ASSERT(isKanaLetter(character));
1723
1724    switch (character) {
1725    case 0x3041: // HIRAGANA LETTER SMALL A
1726    case 0x3043: // HIRAGANA LETTER SMALL I
1727    case 0x3045: // HIRAGANA LETTER SMALL U
1728    case 0x3047: // HIRAGANA LETTER SMALL E
1729    case 0x3049: // HIRAGANA LETTER SMALL O
1730    case 0x3063: // HIRAGANA LETTER SMALL TU
1731    case 0x3083: // HIRAGANA LETTER SMALL YA
1732    case 0x3085: // HIRAGANA LETTER SMALL YU
1733    case 0x3087: // HIRAGANA LETTER SMALL YO
1734    case 0x308E: // HIRAGANA LETTER SMALL WA
1735    case 0x3095: // HIRAGANA LETTER SMALL KA
1736    case 0x3096: // HIRAGANA LETTER SMALL KE
1737    case 0x30A1: // KATAKANA LETTER SMALL A
1738    case 0x30A3: // KATAKANA LETTER SMALL I
1739    case 0x30A5: // KATAKANA LETTER SMALL U
1740    case 0x30A7: // KATAKANA LETTER SMALL E
1741    case 0x30A9: // KATAKANA LETTER SMALL O
1742    case 0x30C3: // KATAKANA LETTER SMALL TU
1743    case 0x30E3: // KATAKANA LETTER SMALL YA
1744    case 0x30E5: // KATAKANA LETTER SMALL YU
1745    case 0x30E7: // KATAKANA LETTER SMALL YO
1746    case 0x30EE: // KATAKANA LETTER SMALL WA
1747    case 0x30F5: // KATAKANA LETTER SMALL KA
1748    case 0x30F6: // KATAKANA LETTER SMALL KE
1749    case 0x31F0: // KATAKANA LETTER SMALL KU
1750    case 0x31F1: // KATAKANA LETTER SMALL SI
1751    case 0x31F2: // KATAKANA LETTER SMALL SU
1752    case 0x31F3: // KATAKANA LETTER SMALL TO
1753    case 0x31F4: // KATAKANA LETTER SMALL NU
1754    case 0x31F5: // KATAKANA LETTER SMALL HA
1755    case 0x31F6: // KATAKANA LETTER SMALL HI
1756    case 0x31F7: // KATAKANA LETTER SMALL HU
1757    case 0x31F8: // KATAKANA LETTER SMALL HE
1758    case 0x31F9: // KATAKANA LETTER SMALL HO
1759    case 0x31FA: // KATAKANA LETTER SMALL MU
1760    case 0x31FB: // KATAKANA LETTER SMALL RA
1761    case 0x31FC: // KATAKANA LETTER SMALL RI
1762    case 0x31FD: // KATAKANA LETTER SMALL RU
1763    case 0x31FE: // KATAKANA LETTER SMALL RE
1764    case 0x31FF: // KATAKANA LETTER SMALL RO
1765    case 0xFF67: // HALFWIDTH KATAKANA LETTER SMALL A
1766    case 0xFF68: // HALFWIDTH KATAKANA LETTER SMALL I
1767    case 0xFF69: // HALFWIDTH KATAKANA LETTER SMALL U
1768    case 0xFF6A: // HALFWIDTH KATAKANA LETTER SMALL E
1769    case 0xFF6B: // HALFWIDTH KATAKANA LETTER SMALL O
1770    case 0xFF6C: // HALFWIDTH KATAKANA LETTER SMALL YA
1771    case 0xFF6D: // HALFWIDTH KATAKANA LETTER SMALL YU
1772    case 0xFF6E: // HALFWIDTH KATAKANA LETTER SMALL YO
1773    case 0xFF6F: // HALFWIDTH KATAKANA LETTER SMALL TU
1774        return true;
1775    }
1776    return false;
1777}
1778
1779enum VoicedSoundMarkType { NoVoicedSoundMark, VoicedSoundMark, SemiVoicedSoundMark };
1780
1781static inline VoicedSoundMarkType composedVoicedSoundMark(UChar character)
1782{
1783    ASSERT(isKanaLetter(character));
1784
1785    switch (character) {
1786    case 0x304C: // HIRAGANA LETTER GA
1787    case 0x304E: // HIRAGANA LETTER GI
1788    case 0x3050: // HIRAGANA LETTER GU
1789    case 0x3052: // HIRAGANA LETTER GE
1790    case 0x3054: // HIRAGANA LETTER GO
1791    case 0x3056: // HIRAGANA LETTER ZA
1792    case 0x3058: // HIRAGANA LETTER ZI
1793    case 0x305A: // HIRAGANA LETTER ZU
1794    case 0x305C: // HIRAGANA LETTER ZE
1795    case 0x305E: // HIRAGANA LETTER ZO
1796    case 0x3060: // HIRAGANA LETTER DA
1797    case 0x3062: // HIRAGANA LETTER DI
1798    case 0x3065: // HIRAGANA LETTER DU
1799    case 0x3067: // HIRAGANA LETTER DE
1800    case 0x3069: // HIRAGANA LETTER DO
1801    case 0x3070: // HIRAGANA LETTER BA
1802    case 0x3073: // HIRAGANA LETTER BI
1803    case 0x3076: // HIRAGANA LETTER BU
1804    case 0x3079: // HIRAGANA LETTER BE
1805    case 0x307C: // HIRAGANA LETTER BO
1806    case 0x3094: // HIRAGANA LETTER VU
1807    case 0x30AC: // KATAKANA LETTER GA
1808    case 0x30AE: // KATAKANA LETTER GI
1809    case 0x30B0: // KATAKANA LETTER GU
1810    case 0x30B2: // KATAKANA LETTER GE
1811    case 0x30B4: // KATAKANA LETTER GO
1812    case 0x30B6: // KATAKANA LETTER ZA
1813    case 0x30B8: // KATAKANA LETTER ZI
1814    case 0x30BA: // KATAKANA LETTER ZU
1815    case 0x30BC: // KATAKANA LETTER ZE
1816    case 0x30BE: // KATAKANA LETTER ZO
1817    case 0x30C0: // KATAKANA LETTER DA
1818    case 0x30C2: // KATAKANA LETTER DI
1819    case 0x30C5: // KATAKANA LETTER DU
1820    case 0x30C7: // KATAKANA LETTER DE
1821    case 0x30C9: // KATAKANA LETTER DO
1822    case 0x30D0: // KATAKANA LETTER BA
1823    case 0x30D3: // KATAKANA LETTER BI
1824    case 0x30D6: // KATAKANA LETTER BU
1825    case 0x30D9: // KATAKANA LETTER BE
1826    case 0x30DC: // KATAKANA LETTER BO
1827    case 0x30F4: // KATAKANA LETTER VU
1828    case 0x30F7: // KATAKANA LETTER VA
1829    case 0x30F8: // KATAKANA LETTER VI
1830    case 0x30F9: // KATAKANA LETTER VE
1831    case 0x30FA: // KATAKANA LETTER VO
1832        return VoicedSoundMark;
1833    case 0x3071: // HIRAGANA LETTER PA
1834    case 0x3074: // HIRAGANA LETTER PI
1835    case 0x3077: // HIRAGANA LETTER PU
1836    case 0x307A: // HIRAGANA LETTER PE
1837    case 0x307D: // HIRAGANA LETTER PO
1838    case 0x30D1: // KATAKANA LETTER PA
1839    case 0x30D4: // KATAKANA LETTER PI
1840    case 0x30D7: // KATAKANA LETTER PU
1841    case 0x30DA: // KATAKANA LETTER PE
1842    case 0x30DD: // KATAKANA LETTER PO
1843        return SemiVoicedSoundMark;
1844    }
1845    return NoVoicedSoundMark;
1846}
1847
1848static inline bool isCombiningVoicedSoundMark(UChar character)
1849{
1850    switch (character) {
1851    case 0x3099: // COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
1852    case 0x309A: // COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
1853        return true;
1854    }
1855    return false;
1856}
1857
1858static inline bool containsKanaLetters(const String& pattern)
1859{
1860    if (pattern.is8Bit())
1861        return false;
1862    const UChar* characters = pattern.characters16();
1863    unsigned length = pattern.length();
1864    for (unsigned i = 0; i < length; ++i) {
1865        if (isKanaLetter(characters[i]))
1866            return true;
1867    }
1868    return false;
1869}
1870
1871static void normalizeCharacters(const UChar* characters, unsigned length, Vector<UChar>& buffer)
1872{
1873    ASSERT(length);
1874
1875    buffer.resize(length);
1876
1877    UErrorCode status = U_ZERO_ERROR;
1878    size_t bufferSize = unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), length, &status);
1879    ASSERT(status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING || status == U_BUFFER_OVERFLOW_ERROR);
1880    ASSERT(bufferSize);
1881
1882    buffer.resize(bufferSize);
1883
1884    if (status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING)
1885        return;
1886
1887    status = U_ZERO_ERROR;
1888    unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), bufferSize, &status);
1889    ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
1890}
1891
1892static bool isNonLatin1Separator(UChar32 character)
1893{
1894    ASSERT_ARG(character, character >= 256);
1895
1896    return U_GET_GC_MASK(character) & (U_GC_S_MASK | U_GC_P_MASK | U_GC_Z_MASK | U_GC_CF_MASK);
1897}
1898
1899static inline bool isSeparator(UChar32 character)
1900{
1901    static const bool latin1SeparatorTable[256] = {
1902        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1903        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1904        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // space ! " # $ % & ' ( ) * + , - . /
1905        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, //                         : ; < = > ?
1906        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //   @
1907        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, //                         [ \ ] ^ _
1908        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //   `
1909        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, //                           { | } ~
1910        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1911        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1912        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
1913        1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
1914        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1915        0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
1916        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1917        0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0
1918    };
1919
1920    if (character < 256)
1921        return latin1SeparatorTable[character];
1922
1923    return isNonLatin1Separator(character);
1924}
1925
1926inline SearchBuffer::SearchBuffer(const String& target, FindOptions options)
1927    : m_target(foldQuoteMarks(target))
1928    , m_targetCharacters(StringView(m_target).upconvertedCharacters())
1929    , m_options(options)
1930    , m_prefixLength(0)
1931    , m_atBreak(true)
1932    , m_needsMoreContext(options & AtWordStarts)
1933    , m_targetRequiresKanaWorkaround(containsKanaLetters(m_target))
1934{
1935    ASSERT(!m_target.isEmpty());
1936
1937    size_t targetLength = m_target.length();
1938    m_buffer.reserveInitialCapacity(std::max(targetLength * 8, minimumSearchBufferSize));
1939    m_overlap = m_buffer.capacity() / 4;
1940
1941    if ((m_options & AtWordStarts) && targetLength) {
1942        UChar32 targetFirstCharacter;
1943        U16_GET(m_target, 0, 0, targetLength, targetFirstCharacter);
1944        // Characters in the separator category never really occur at the beginning of a word,
1945        // so if the target begins with such a character, we just ignore the AtWordStart option.
1946        if (isSeparator(targetFirstCharacter)) {
1947            m_options &= ~AtWordStarts;
1948            m_needsMoreContext = false;
1949        }
1950    }
1951
1952    // Grab the single global searcher.
1953    // If we ever have a reason to do more than once search buffer at once, we'll have
1954    // to move to multiple searchers.
1955    lockSearcher();
1956
1957    UStringSearch* searcher = WebCore::searcher();
1958    UCollator* collator = usearch_getCollator(searcher);
1959
1960    UCollationStrength strength;
1961    USearchAttributeValue comparator;
1962    if (m_options & CaseInsensitive) {
1963        // Without loss of generality, have 'e' match {'e', 'E', 'é', 'É'} and 'é' match {'é', 'É'}.
1964        strength = UCOL_SECONDARY;
1965        comparator = USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD;
1966    } else {
1967        // Without loss of generality, have 'e' match {'e'} and 'é' match {'é'}.
1968        strength = UCOL_TERTIARY;
1969        comparator = USEARCH_STANDARD_ELEMENT_COMPARISON;
1970    }
1971    if (ucol_getStrength(collator) != strength) {
1972        ucol_setStrength(collator, strength);
1973        usearch_reset(searcher);
1974    }
1975
1976    UErrorCode status = U_ZERO_ERROR;
1977    usearch_setAttribute(searcher, USEARCH_ELEMENT_COMPARISON, comparator, &status);
1978    ASSERT(status == U_ZERO_ERROR);
1979
1980    usearch_setPattern(searcher, m_targetCharacters, targetLength, &status);
1981    ASSERT(status == U_ZERO_ERROR);
1982
1983    // The kana workaround requires a normalized copy of the target string.
1984    if (m_targetRequiresKanaWorkaround)
1985        normalizeCharacters(m_targetCharacters, targetLength, m_normalizedTarget);
1986}
1987
1988inline SearchBuffer::~SearchBuffer()
1989{
1990    // Leave the static object pointing to a valid string.
1991    UErrorCode status = U_ZERO_ERROR;
1992    usearch_setPattern(WebCore::searcher(), &newlineCharacter, 1, &status);
1993    ASSERT(status == U_ZERO_ERROR);
1994    usearch_setText(WebCore::searcher(), &newlineCharacter, 1, &status);
1995    ASSERT(status == U_ZERO_ERROR);
1996
1997    unlockSearcher();
1998}
1999
2000inline size_t SearchBuffer::append(StringView text)
2001{
2002    ASSERT(text.length());
2003
2004    if (m_atBreak) {
2005        m_buffer.shrink(0);
2006        m_prefixLength = 0;
2007        m_atBreak = false;
2008    } else if (m_buffer.size() == m_buffer.capacity()) {
2009        memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar));
2010        m_prefixLength -= std::min(m_prefixLength, m_buffer.size() - m_overlap);
2011        m_buffer.shrink(m_overlap);
2012    }
2013
2014    size_t oldLength = m_buffer.size();
2015    size_t usableLength = std::min<size_t>(m_buffer.capacity() - oldLength, text.length());
2016    ASSERT(usableLength);
2017    m_buffer.grow(oldLength + usableLength);
2018    for (unsigned i = 0; i < usableLength; ++i)
2019        m_buffer[oldLength + i] = foldQuoteMark(text[i]);
2020    return usableLength;
2021}
2022
2023inline bool SearchBuffer::needsMoreContext() const
2024{
2025    return m_needsMoreContext;
2026}
2027
2028inline void SearchBuffer::prependContext(StringView text)
2029{
2030    ASSERT(m_needsMoreContext);
2031    ASSERT(m_prefixLength == m_buffer.size());
2032
2033    if (!text.length())
2034        return;
2035
2036    m_atBreak = false;
2037
2038    size_t wordBoundaryContextStart = text.length();
2039    if (wordBoundaryContextStart) {
2040        U16_BACK_1(text, 0, wordBoundaryContextStart);
2041        wordBoundaryContextStart = startOfLastWordBoundaryContext(text.substring(0, wordBoundaryContextStart));
2042    }
2043
2044    size_t usableLength = std::min(m_buffer.capacity() - m_prefixLength, text.length() - wordBoundaryContextStart);
2045    WTF::append(m_buffer, text.substring(text.length() - usableLength, usableLength));
2046    m_prefixLength += usableLength;
2047
2048    if (wordBoundaryContextStart || m_prefixLength == m_buffer.capacity())
2049        m_needsMoreContext = false;
2050}
2051
2052inline bool SearchBuffer::atBreak() const
2053{
2054    return m_atBreak;
2055}
2056
2057inline void SearchBuffer::reachedBreak()
2058{
2059    m_atBreak = true;
2060}
2061
2062inline bool SearchBuffer::isBadMatch(const UChar* match, size_t matchLength) const
2063{
2064    // This function implements the kana workaround. If usearch treats
2065    // it as a match, but we do not want to, then it's a "bad match".
2066    if (!m_targetRequiresKanaWorkaround)
2067        return false;
2068
2069    // Normalize into a match buffer. We reuse a single buffer rather than
2070    // creating a new one each time.
2071    normalizeCharacters(match, matchLength, m_normalizedMatch);
2072
2073    const UChar* a = m_normalizedTarget.begin();
2074    const UChar* aEnd = m_normalizedTarget.end();
2075
2076    const UChar* b = m_normalizedMatch.begin();
2077    const UChar* bEnd = m_normalizedMatch.end();
2078
2079    while (true) {
2080        // Skip runs of non-kana-letter characters. This is necessary so we can
2081        // correctly handle strings where the target and match have different-length
2082        // runs of characters that match, while still double checking the correctness
2083        // of matches of kana letters with other kana letters.
2084        while (a != aEnd && !isKanaLetter(*a))
2085            ++a;
2086        while (b != bEnd && !isKanaLetter(*b))
2087            ++b;
2088
2089        // If we reached the end of either the target or the match, we should have
2090        // reached the end of both; both should have the same number of kana letters.
2091        if (a == aEnd || b == bEnd) {
2092            ASSERT(a == aEnd);
2093            ASSERT(b == bEnd);
2094            return false;
2095        }
2096
2097        // Check for differences in the kana letter character itself.
2098        if (isSmallKanaLetter(*a) != isSmallKanaLetter(*b))
2099            return true;
2100        if (composedVoicedSoundMark(*a) != composedVoicedSoundMark(*b))
2101            return true;
2102        ++a;
2103        ++b;
2104
2105        // Check for differences in combining voiced sound marks found after the letter.
2106        while (1) {
2107            if (!(a != aEnd && isCombiningVoicedSoundMark(*a))) {
2108                if (b != bEnd && isCombiningVoicedSoundMark(*b))
2109                    return true;
2110                break;
2111            }
2112            if (!(b != bEnd && isCombiningVoicedSoundMark(*b)))
2113                return true;
2114            if (*a != *b)
2115                return true;
2116            ++a;
2117            ++b;
2118        }
2119    }
2120}
2121
2122inline bool SearchBuffer::isWordEndMatch(size_t start, size_t length) const
2123{
2124    ASSERT(length);
2125    ASSERT(m_options & AtWordEnds);
2126
2127    int endWord;
2128    // Start searching at the end of matched search, so that multiple word matches succeed.
2129    findEndWordBoundary(StringView(m_buffer.data(), m_buffer.size()), start + length - 1, &endWord);
2130    return static_cast<size_t>(endWord) == (start + length);
2131}
2132
2133inline bool SearchBuffer::isWordStartMatch(size_t start, size_t length) const
2134{
2135    ASSERT(m_options & AtWordStarts);
2136
2137    if (!start)
2138        return true;
2139
2140    int size = m_buffer.size();
2141    int offset = start;
2142    UChar32 firstCharacter;
2143    U16_GET(m_buffer.data(), 0, offset, size, firstCharacter);
2144
2145    if (m_options & TreatMedialCapitalAsWordStart) {
2146        UChar32 previousCharacter;
2147        U16_PREV(m_buffer.data(), 0, offset, previousCharacter);
2148
2149        if (isSeparator(firstCharacter)) {
2150            // The start of a separator run is a word start (".org" in "webkit.org").
2151            if (!isSeparator(previousCharacter))
2152                return true;
2153        } else if (isASCIIUpper(firstCharacter)) {
2154            // The start of an uppercase run is a word start ("Kit" in "WebKit").
2155            if (!isASCIIUpper(previousCharacter))
2156                return true;
2157            // The last character of an uppercase run followed by a non-separator, non-digit
2158            // is a word start ("Request" in "XMLHTTPRequest").
2159            offset = start;
2160            U16_FWD_1(m_buffer.data(), offset, size);
2161            UChar32 nextCharacter = 0;
2162            if (offset < size)
2163                U16_GET(m_buffer.data(), 0, offset, size, nextCharacter);
2164            if (!isASCIIUpper(nextCharacter) && !isASCIIDigit(nextCharacter) && !isSeparator(nextCharacter))
2165                return true;
2166        } else if (isASCIIDigit(firstCharacter)) {
2167            // The start of a digit run is a word start ("2" in "WebKit2").
2168            if (!isASCIIDigit(previousCharacter))
2169                return true;
2170        } else if (isSeparator(previousCharacter) || isASCIIDigit(previousCharacter)) {
2171            // The start of a non-separator, non-uppercase, non-digit run is a word start,
2172            // except after an uppercase. ("org" in "webkit.org", but not "ore" in "WebCore").
2173            return true;
2174        }
2175    }
2176
2177    // Chinese and Japanese lack word boundary marks, and there is no clear agreement on what constitutes
2178    // a word, so treat the position before any CJK character as a word start.
2179    if (Font::isCJKIdeographOrSymbol(firstCharacter))
2180        return true;
2181
2182    size_t wordBreakSearchStart = start + length;
2183    while (wordBreakSearchStart > start)
2184        wordBreakSearchStart = findNextWordFromIndex(StringView(m_buffer.data(), m_buffer.size()), wordBreakSearchStart, false /* backwards */);
2185    return wordBreakSearchStart == start;
2186}
2187
2188inline size_t SearchBuffer::search(size_t& start)
2189{
2190    size_t size = m_buffer.size();
2191    if (m_atBreak) {
2192        if (!size)
2193            return 0;
2194    } else {
2195        if (size != m_buffer.capacity())
2196            return 0;
2197    }
2198
2199    UStringSearch* searcher = WebCore::searcher();
2200
2201    UErrorCode status = U_ZERO_ERROR;
2202    usearch_setText(searcher, m_buffer.data(), size, &status);
2203    ASSERT(status == U_ZERO_ERROR);
2204
2205    usearch_setOffset(searcher, m_prefixLength, &status);
2206    ASSERT(status == U_ZERO_ERROR);
2207
2208    int matchStart = usearch_next(searcher, &status);
2209    ASSERT(status == U_ZERO_ERROR);
2210
2211nextMatch:
2212    if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) {
2213        ASSERT(matchStart == USEARCH_DONE);
2214        return 0;
2215    }
2216
2217    // Matches that start in the overlap area are only tentative.
2218    // The same match may appear later, matching more characters,
2219    // possibly including a combining character that's not yet in the buffer.
2220    if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) {
2221        size_t overlap = m_overlap;
2222        if (m_options & AtWordStarts) {
2223            // Ensure that there is sufficient context before matchStart the next time around for
2224            // determining if it is at a word boundary.
2225            unsigned wordBoundaryContextStart = matchStart;
2226            U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart);
2227            wordBoundaryContextStart = startOfLastWordBoundaryContext(StringView(m_buffer.data(), wordBoundaryContextStart));
2228            overlap = std::min(size - 1, std::max(overlap, size - wordBoundaryContextStart));
2229        }
2230        memcpy(m_buffer.data(), m_buffer.data() + size - overlap, overlap * sizeof(UChar));
2231        m_prefixLength -= std::min(m_prefixLength, size - overlap);
2232        m_buffer.shrink(overlap);
2233        return 0;
2234    }
2235
2236    size_t matchedLength = usearch_getMatchedLength(searcher);
2237    ASSERT_WITH_SECURITY_IMPLICATION(matchStart + matchedLength <= size);
2238
2239    // If this match is "bad", move on to the next match.
2240    if (isBadMatch(m_buffer.data() + matchStart, matchedLength)
2241        || ((m_options & AtWordStarts) && !isWordStartMatch(matchStart, matchedLength))
2242        || ((m_options & AtWordEnds) && !isWordEndMatch(matchStart, matchedLength))) {
2243        matchStart = usearch_next(searcher, &status);
2244        ASSERT(status == U_ZERO_ERROR);
2245        goto nextMatch;
2246    }
2247
2248    size_t newSize = size - (matchStart + 1);
2249    memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar));
2250    m_prefixLength -= std::min<size_t>(m_prefixLength, matchStart + 1);
2251    m_buffer.shrink(newSize);
2252
2253    start = size - matchStart;
2254    return matchedLength;
2255}
2256
2257#else
2258
2259inline SearchBuffer::SearchBuffer(const String& target, FindOptions options)
2260    : m_target(options & CaseInsensitive ? target.foldCase() : target)
2261    , m_options(options)
2262    , m_buffer(m_target.length())
2263    , m_isCharacterStartBuffer(m_target.length())
2264    , m_isBufferFull(false)
2265    , m_cursor(0)
2266{
2267    ASSERT(!m_target.isEmpty());
2268    m_target.replace(noBreakSpace, ' ');
2269    foldQuoteMarks(m_target);
2270}
2271
2272inline SearchBuffer::~SearchBuffer()
2273{
2274}
2275
2276inline void SearchBuffer::reachedBreak()
2277{
2278    m_cursor = 0;
2279    m_isBufferFull = false;
2280}
2281
2282inline bool SearchBuffer::atBreak() const
2283{
2284    return !m_cursor && !m_isBufferFull;
2285}
2286
2287inline void SearchBuffer::append(UChar c, bool isStart)
2288{
2289    m_buffer[m_cursor] = c == noBreakSpace ? ' ' : foldQuoteMark(c);
2290    m_isCharacterStartBuffer[m_cursor] = isStart;
2291    if (++m_cursor == m_target.length()) {
2292        m_cursor = 0;
2293        m_isBufferFull = true;
2294    }
2295}
2296
2297inline size_t SearchBuffer::append(const UChar* characters, size_t length)
2298{
2299    ASSERT(length);
2300    if (!(m_options & CaseInsensitive)) {
2301        append(characters[0], true);
2302        return 1;
2303    }
2304    const int maxFoldedCharacters = 16; // sensible maximum is 3, this should be more than enough
2305    UChar foldedCharacters[maxFoldedCharacters];
2306    UErrorCode status = U_ZERO_ERROR;
2307    int numFoldedCharacters = u_strFoldCase(foldedCharacters, maxFoldedCharacters, characters, 1, U_FOLD_CASE_DEFAULT, &status);
2308    ASSERT(U_SUCCESS(status));
2309    ASSERT(numFoldedCharacters);
2310    ASSERT(numFoldedCharacters <= maxFoldedCharacters);
2311    if (U_SUCCESS(status) && numFoldedCharacters) {
2312        numFoldedCharacters = std::min(numFoldedCharacters, maxFoldedCharacters);
2313        append(foldedCharacters[0], true);
2314        for (int i = 1; i < numFoldedCharacters; ++i)
2315            append(foldedCharacters[i], false);
2316    }
2317    return 1;
2318}
2319
2320inline bool SearchBuffer::needsMoreContext() const
2321{
2322    return false;
2323}
2324
2325void SearchBuffer::prependContext(const UChar*, size_t)
2326{
2327    ASSERT_NOT_REACHED();
2328}
2329
2330inline size_t SearchBuffer::search(size_t& start)
2331{
2332    if (!m_isBufferFull)
2333        return 0;
2334    if (!m_isCharacterStartBuffer[m_cursor])
2335        return 0;
2336
2337    size_t tailSpace = m_target.length() - m_cursor;
2338    if (memcmp(&m_buffer[m_cursor], m_target.characters(), tailSpace * sizeof(UChar)) != 0)
2339        return 0;
2340    if (memcmp(&m_buffer[0], m_target.characters() + tailSpace, m_cursor * sizeof(UChar)) != 0)
2341        return 0;
2342
2343    start = length();
2344
2345    // Now that we've found a match once, we don't want to find it again, because those
2346    // are the SearchBuffer semantics, allowing for a buffer where you append more than one
2347    // character at a time. To do this we take advantage of m_isCharacterStartBuffer, but if
2348    // we want to get rid of that in the future we could track this with a separate boolean
2349    // or even move the characters to the start of the buffer and set m_isBufferFull to false.
2350    m_isCharacterStartBuffer[m_cursor] = false;
2351
2352    return start;
2353}
2354
2355// Returns the number of characters that were appended to the buffer (what we are searching in).
2356// That's not necessarily the same length as the passed-in target string, because case folding
2357// can make two strings match even though they're not the same length.
2358size_t SearchBuffer::length() const
2359{
2360    size_t bufferSize = m_target.length();
2361    size_t length = 0;
2362    for (size_t i = 0; i < bufferSize; ++i)
2363        length += m_isCharacterStartBuffer[i];
2364    return length;
2365}
2366
2367#endif
2368
2369// --------
2370
2371int TextIterator::rangeLength(const Range* range, bool forSelectionPreservation)
2372{
2373    unsigned length = 0;
2374    for (TextIterator it(range, forSelectionPreservation ? TextIteratorEmitsCharactersBetweenAllVisiblePositions : TextIteratorDefaultBehavior); !it.atEnd(); it.advance())
2375        length += it.text().length();
2376    return length;
2377}
2378
2379PassRefPtr<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount)
2380{
2381    CharacterIterator entireRangeIterator(*entireRange);
2382    return characterSubrange(entireRangeIterator, characterOffset, characterCount);
2383}
2384
2385static inline bool isInsideReplacedElement(TextIterator& iterator)
2386{
2387    ASSERT(!iterator.atEnd());
2388    ASSERT(iterator.text().length() == 1);
2389    Node* node = iterator.node();
2390    return node && isRendererReplacedElement(node->renderer());
2391}
2392
2393PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(ContainerNode* scope, int rangeLocation, int rangeLength, bool forSelectionPreservation)
2394{
2395    RefPtr<Range> resultRange = scope->document().createRange();
2396
2397    int docTextPosition = 0;
2398    int rangeEnd = rangeLocation + rangeLength;
2399    bool startRangeFound = false;
2400
2401    RefPtr<Range> textRunRange = rangeOfContents(*scope);
2402
2403    TextIterator it(textRunRange.get(), forSelectionPreservation ? TextIteratorEmitsCharactersBetweenAllVisiblePositions : TextIteratorDefaultBehavior);
2404
2405    // FIXME: the atEnd() check shouldn't be necessary, workaround for <http://bugs.webkit.org/show_bug.cgi?id=6289>.
2406    if (!rangeLocation && !rangeLength && it.atEnd()) {
2407        resultRange->setStart(textRunRange->startContainer(), 0);
2408        resultRange->setEnd(textRunRange->startContainer(), 0);
2409        return resultRange.release();
2410    }
2411
2412    for (; !it.atEnd(); it.advance()) {
2413        int length = it.text().length();
2414        textRunRange = it.range();
2415
2416        bool foundStart = rangeLocation >= docTextPosition && rangeLocation <= docTextPosition + length;
2417        bool foundEnd = rangeEnd >= docTextPosition && rangeEnd <= docTextPosition + length;
2418
2419        if (foundEnd) {
2420            // FIXME: This is a workaround for the fact that the end of a run is often at the wrong
2421            // position for emitted '\n's or if the renderer of the current node is a replaced element.
2422            if (length == 1 && (it.text()[0] == '\n' || isInsideReplacedElement(it))) {
2423                it.advance();
2424                if (!it.atEnd()) {
2425                    RefPtr<Range> range = it.range();
2426                    textRunRange->setEnd(range->startContainer(), range->startOffset());
2427                } else {
2428                    Position runStart = textRunRange->startPosition();
2429                    Position runEnd = VisiblePosition(runStart).next().deepEquivalent();
2430                    if (runEnd.isNotNull())
2431                        textRunRange->setEnd(runEnd.containerNode(), runEnd.computeOffsetInContainerNode());
2432                }
2433            }
2434        }
2435
2436        if (foundStart) {
2437            startRangeFound = true;
2438            if (textRunRange->startContainer()->isTextNode()) {
2439                int offset = rangeLocation - docTextPosition;
2440                resultRange->setStart(textRunRange->startContainer(), offset + textRunRange->startOffset());
2441            } else {
2442                if (rangeLocation == docTextPosition)
2443                    resultRange->setStart(textRunRange->startContainer(), textRunRange->startOffset());
2444                else
2445                    resultRange->setStart(textRunRange->endContainer(), textRunRange->endOffset());
2446            }
2447        }
2448
2449        if (foundEnd) {
2450            if (textRunRange->startContainer()->isTextNode()) {
2451                int offset = rangeEnd - docTextPosition;
2452                resultRange->setEnd(textRunRange->startContainer(), offset + textRunRange->startOffset());
2453            } else {
2454                if (rangeEnd == docTextPosition)
2455                    resultRange->setEnd(textRunRange->startContainer(), textRunRange->startOffset());
2456                else
2457                    resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset());
2458            }
2459            docTextPosition += length;
2460            break;
2461        }
2462
2463        docTextPosition += length;
2464    }
2465
2466    if (!startRangeFound)
2467        return nullptr;
2468
2469    if (rangeLength && rangeEnd > docTextPosition) // rangeEnd is out of bounds
2470        resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset());
2471
2472    return resultRange.release();
2473}
2474
2475bool TextIterator::getLocationAndLengthFromRange(Node* scope, const Range* range, size_t& location, size_t& length)
2476{
2477    location = notFound;
2478    length = 0;
2479
2480    if (!range->startContainer())
2481        return false;
2482
2483    // The critical assumption is that this only gets called with ranges that
2484    // concentrate on a given area containing the selection root. This is done
2485    // because of text fields and textareas. The DOM for those is not
2486    // directly in the document DOM, so ensure that the range does not cross a
2487    // boundary of one of those.
2488    if (range->startContainer() != scope && !range->startContainer()->isDescendantOf(scope))
2489        return false;
2490    if (range->endContainer() != scope && !range->endContainer()->isDescendantOf(scope))
2491        return false;
2492
2493    RefPtr<Range> testRange = Range::create(scope->document(), scope, 0, range->startContainer(), range->startOffset());
2494    ASSERT(testRange->startContainer() == scope);
2495    location = TextIterator::rangeLength(testRange.get());
2496
2497    testRange->setEnd(range->endContainer(), range->endOffset(), IGNORE_EXCEPTION);
2498    ASSERT(testRange->startContainer() == scope);
2499    length = TextIterator::rangeLength(testRange.get()) - location;
2500    return true;
2501}
2502
2503// --------
2504
2505String plainText(const Range* r, TextIteratorBehavior defaultBehavior, bool isDisplayString)
2506{
2507    // The initial buffer size can be critical for performance: https://bugs.webkit.org/show_bug.cgi?id=81192
2508    static const unsigned initialCapacity = 1 << 15;
2509
2510    unsigned bufferLength = 0;
2511    StringBuilder builder;
2512    builder.reserveCapacity(initialCapacity);
2513    TextIteratorBehavior behavior = defaultBehavior;
2514    if (!isDisplayString)
2515        behavior = static_cast<TextIteratorBehavior>(behavior | TextIteratorEmitsTextsWithoutTranscoding);
2516
2517    for (TextIterator it(r, behavior); !it.atEnd(); it.advance()) {
2518        it.appendTextToStringBuilder(builder);
2519        bufferLength += it.text().length();
2520    }
2521
2522    if (!bufferLength)
2523        return emptyString();
2524
2525    String result = builder.toString();
2526
2527    if (isDisplayString)
2528        r->ownerDocument().displayStringModifiedByEncoding(result);
2529
2530    return result;
2531}
2532
2533String plainTextReplacingNoBreakSpace(const Range* range, TextIteratorBehavior defaultBehavior, bool isDisplayString)
2534{
2535    return plainText(range, defaultBehavior, isDisplayString).replace(noBreakSpace, ' ');
2536}
2537
2538static PassRefPtr<Range> collapsedToBoundary(const Range& range, bool forward)
2539{
2540    RefPtr<Range> result = range.cloneRange(ASSERT_NO_EXCEPTION);
2541    result->collapse(!forward, ASSERT_NO_EXCEPTION);
2542    return result.release();
2543}
2544
2545static size_t findPlainText(const Range& range, const String& target, FindOptions options, size_t& matchStart)
2546{
2547    matchStart = 0;
2548    size_t matchLength = 0;
2549
2550    SearchBuffer buffer(target, options);
2551
2552    if (buffer.needsMoreContext()) {
2553        RefPtr<Range> beforeStartRange = range.ownerDocument().createRange();
2554        beforeStartRange->setEnd(range.startContainer(), range.startOffset());
2555        for (SimplifiedBackwardsTextIterator backwardsIterator(*beforeStartRange); !backwardsIterator.atEnd(); backwardsIterator.advance()) {
2556            buffer.prependContext(backwardsIterator.text());
2557            if (!buffer.needsMoreContext())
2558                break;
2559        }
2560    }
2561
2562    CharacterIterator findIterator(range, TextIteratorEntersTextControls);
2563
2564    while (!findIterator.atEnd()) {
2565        findIterator.advance(buffer.append(findIterator.text()));
2566tryAgain:
2567        size_t matchStartOffset;
2568        if (size_t newMatchLength = buffer.search(matchStartOffset)) {
2569            // Note that we found a match, and where we found it.
2570            size_t lastCharacterInBufferOffset = findIterator.characterOffset();
2571            ASSERT(lastCharacterInBufferOffset >= matchStartOffset);
2572            matchStart = lastCharacterInBufferOffset - matchStartOffset;
2573            matchLength = newMatchLength;
2574            // If searching forward, stop on the first match.
2575            // If searching backward, don't stop, so we end up with the last match.
2576            if (!(options & Backwards))
2577                break;
2578            goto tryAgain;
2579        }
2580        if (findIterator.atBreak() && !buffer.atBreak()) {
2581            buffer.reachedBreak();
2582            goto tryAgain;
2583        }
2584    }
2585
2586    return matchLength;
2587}
2588
2589PassRefPtr<Range> findPlainText(const Range& range, const String& target, FindOptions options)
2590{
2591    // First, find the text.
2592    size_t matchStart;
2593    size_t matchLength;
2594    {
2595        matchLength = findPlainText(range, target, options, matchStart);
2596        if (!matchLength)
2597            return collapsedToBoundary(range, !(options & Backwards));
2598    }
2599
2600    // Then, find the document position of the start and the end of the text.
2601    CharacterIterator computeRangeIterator(range, TextIteratorEntersTextControls);
2602    return characterSubrange(computeRangeIterator, matchStart, matchLength);
2603}
2604
2605}
2606