1/* 2 * Copyright (C) 2004-2014 Apple Inc. All rights reserved. 3 * Copyright (C) 2005 Alexey Proskuryakov. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27#include "config.h" 28#include "TextIterator.h" 29 30#include "Document.h" 31#include "ExceptionCodePlaceholder.h" 32#include "Font.h" 33#include "Frame.h" 34#include "HTMLElement.h" 35#include "HTMLNames.h" 36#include "HTMLTextFormControlElement.h" 37#include "InlineTextBox.h" 38#include "NodeTraversal.h" 39#include "RenderImage.h" 40#include "RenderIterator.h" 41#include "RenderTableCell.h" 42#include "RenderTableRow.h" 43#include "RenderTextControl.h" 44#include "RenderTextFragment.h" 45#include "ShadowRoot.h" 46#include "TextBoundaries.h" 47#include "TextBreakIterator.h" 48#include "TextControlInnerElements.h" 49#include "VisiblePosition.h" 50#include "VisibleUnits.h" 51#include "htmlediting.h" 52#include <wtf/text/CString.h> 53#include <wtf/text/StringBuilder.h> 54#include <wtf/unicode/CharacterNames.h> 55 56#if !UCONFIG_NO_COLLATION 57#include "TextBreakIteratorInternalICU.h" 58#include <unicode/usearch.h> 59#endif 60 61using namespace WTF::Unicode; 62 63namespace WebCore { 64 65using namespace HTMLNames; 66 67// Buffer that knows how to compare with a search target. 68// Keeps enough of the previous text to be able to search in the future, but no more. 69// Non-breaking spaces are always equal to normal spaces. 70// Case folding is also done if the CaseInsensitive option is specified. 71// Matches are further filtered if the AtWordStarts option is specified, although some 72// matches inside a word are permitted if TreatMedialCapitalAsWordStart is specified as well. 73class SearchBuffer { 74 WTF_MAKE_NONCOPYABLE(SearchBuffer); 75public: 76 SearchBuffer(const String& target, FindOptions); 77 ~SearchBuffer(); 78 79 // Returns number of characters appended; guaranteed to be in the range [1, length]. 80 size_t append(StringView); 81 bool needsMoreContext() const; 82 void prependContext(StringView); 83 void reachedBreak(); 84 85 // Result is the size in characters of what was found. 86 // And <startOffset> is the number of characters back to the start of what was found. 87 size_t search(size_t& startOffset); 88 bool atBreak() const; 89 90#if !UCONFIG_NO_COLLATION 91 92private: 93 bool isBadMatch(const UChar*, size_t length) const; 94 bool isWordStartMatch(size_t start, size_t length) const; 95 bool isWordEndMatch(size_t start, size_t length) const; 96 97 const String m_target; 98 const StringView::UpconvertedCharacters m_targetCharacters; 99 FindOptions m_options; 100 101 Vector<UChar> m_buffer; 102 size_t m_overlap; 103 size_t m_prefixLength; 104 bool m_atBreak; 105 bool m_needsMoreContext; 106 107 const bool m_targetRequiresKanaWorkaround; 108 Vector<UChar> m_normalizedTarget; 109 mutable Vector<UChar> m_normalizedMatch; 110 111#else 112 113private: 114 void append(UChar, bool isCharacterStart); 115 size_t length() const; 116 117 String m_target; 118 FindOptions m_options; 119 120 Vector<UChar> m_buffer; 121 Vector<bool> m_isCharacterStartBuffer; 122 bool m_isBufferFull; 123 size_t m_cursor; 124 125#endif 126}; 127 128// -------- 129 130static const unsigned bitsInWord = sizeof(unsigned) * 8; 131static const unsigned bitInWordMask = bitsInWord - 1; 132 133BitStack::BitStack() 134 : m_size(0) 135{ 136} 137 138BitStack::~BitStack() 139{ 140} 141 142void BitStack::push(bool bit) 143{ 144 unsigned index = m_size / bitsInWord; 145 unsigned shift = m_size & bitInWordMask; 146 if (!shift && index == m_words.size()) { 147 m_words.grow(index + 1); 148 m_words[index] = 0; 149 } 150 unsigned& word = m_words[index]; 151 unsigned mask = 1U << shift; 152 if (bit) 153 word |= mask; 154 else 155 word &= ~mask; 156 ++m_size; 157} 158 159void BitStack::pop() 160{ 161 if (m_size) 162 --m_size; 163} 164 165bool BitStack::top() const 166{ 167 if (!m_size) 168 return false; 169 unsigned shift = (m_size - 1) & bitInWordMask; 170 return m_words.last() & (1U << shift); 171} 172 173unsigned BitStack::size() const 174{ 175 return m_size; 176} 177 178// -------- 179 180#if !ASSERT_DISABLED 181 182static unsigned depthCrossingShadowBoundaries(Node& node) 183{ 184 unsigned depth = 0; 185 for (Node* parent = node.parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode()) 186 ++depth; 187 return depth; 188} 189 190#endif 191 192// This function is like Range::pastLastNode, except for the fact that it can climb up out of shadow trees. 193static Node* nextInPreOrderCrossingShadowBoundaries(Node& rangeEndContainer, int rangeEndOffset) 194{ 195 if (rangeEndOffset >= 0 && !rangeEndContainer.offsetInCharacters()) { 196 if (Node* next = rangeEndContainer.childNode(rangeEndOffset)) 197 return next; 198 } 199 for (Node* node = &rangeEndContainer; node; node = node->parentOrShadowHostNode()) { 200 if (Node* next = node->nextSibling()) 201 return next; 202 } 203 return nullptr; 204} 205 206static inline bool fullyClipsContents(Node& node) 207{ 208 auto* renderer = node.renderer(); 209 if (!renderer || !renderer->isBox() || !renderer->hasOverflowClip()) 210 return false; 211 return toRenderBox(renderer)->size().isEmpty(); 212} 213 214static inline bool ignoresContainerClip(Node& node) 215{ 216 auto* renderer = node.renderer(); 217 if (!renderer || renderer->isTextOrLineBreak()) 218 return false; 219 return renderer->style().hasOutOfFlowPosition(); 220} 221 222static void pushFullyClippedState(BitStack& stack, Node& node) 223{ 224 ASSERT(stack.size() == depthCrossingShadowBoundaries(node)); 225 226 // Push true if this node full clips its contents, or if a parent already has fully 227 // clipped and this is not a node that ignores its container's clip. 228 stack.push(fullyClipsContents(node) || (stack.top() && !ignoresContainerClip(node))); 229} 230 231static void setUpFullyClippedStack(BitStack& stack, Node& node) 232{ 233 // Put the nodes in a vector so we can iterate in reverse order. 234 Vector<Node*, 100> ancestry; 235 for (Node* parent = node.parentOrShadowHostNode(); parent; parent = parent->parentOrShadowHostNode()) 236 ancestry.append(parent); 237 238 // Call pushFullyClippedState on each node starting with the earliest ancestor. 239 size_t size = ancestry.size(); 240 for (size_t i = 0; i < size; ++i) 241 pushFullyClippedState(stack, *ancestry[size - i - 1]); 242 pushFullyClippedState(stack, node); 243 244 ASSERT(stack.size() == 1 + depthCrossingShadowBoundaries(node)); 245} 246 247// FIXME: editingIgnoresContent and isRendererReplacedElement try to do the same job. 248// It's not good to have both of them. 249bool isRendererReplacedElement(RenderObject* renderer) 250{ 251 if (!renderer) 252 return false; 253 254 if (renderer->isImage() || renderer->isWidget() || renderer->isMedia()) 255 return true; 256 257 if (renderer->node() && renderer->node()->isElementNode()) { 258 Element& element = toElement(*renderer->node()); 259 if (element.isFormControlElement() || element.hasTagName(legendTag) || element.hasTagName(meterTag) || element.hasTagName(progressTag)) 260 return true; 261 if (equalIgnoringCase(element.fastGetAttribute(roleAttr), "img")) 262 return true; 263 } 264 265 return false; 266} 267 268// -------- 269 270inline void TextIteratorCopyableText::reset() 271{ 272 m_singleCharacter = 0; 273 m_string = String(); 274 m_offset = 0; 275 m_length = 0; 276} 277 278inline void TextIteratorCopyableText::set(String&& string) 279{ 280 m_singleCharacter = 0; 281 m_string = WTF::move(string); 282 m_offset = 0; 283 m_length = m_string.length(); 284} 285 286inline void TextIteratorCopyableText::set(String&& string, unsigned offset, unsigned length) 287{ 288 ASSERT(offset < string.length()); 289 ASSERT(length); 290 ASSERT(length <= string.length() - offset); 291 292 m_singleCharacter = 0; 293 m_string = WTF::move(string); 294 m_offset = offset; 295 m_length = length; 296} 297 298inline void TextIteratorCopyableText::set(UChar singleCharacter) 299{ 300 m_singleCharacter = singleCharacter; 301 m_string = String(); 302 m_offset = 0; 303 m_length = 0; 304} 305 306void TextIteratorCopyableText::appendToStringBuilder(StringBuilder& builder) const 307{ 308 if (m_singleCharacter) 309 builder.append(m_singleCharacter); 310 else 311 builder.append(m_string, m_offset, m_length); 312} 313 314// -------- 315 316TextIterator::TextIterator(const Range* range, TextIteratorBehavior behavior) 317 : m_behavior(behavior) 318 , m_handledNode(false) 319 , m_handledChildren(false) 320 , m_startContainer(nullptr) 321 , m_startOffset(0) 322 , m_endContainer(nullptr) 323 , m_endOffset(0) 324 , m_positionNode(nullptr) 325 , m_needsAnotherNewline(false) 326 , m_textBox(nullptr) 327 , m_remainingTextBox(nullptr) 328 , m_firstLetterText(nullptr) 329 , m_lastTextNode(nullptr) 330 , m_lastTextNodeEndedWithCollapsedSpace(false) 331 , m_lastCharacter(0) 332 , m_sortedTextBoxesPosition(0) 333 , m_hasEmitted(false) 334 , m_handledFirstLetter(false) 335{ 336 // FIXME: Only m_positionNode above needs to be initialized if range is null. 337 if (!range) 338 return; 339 340 range->ownerDocument().updateLayoutIgnorePendingStylesheets(); 341 342 m_startContainer = range->startContainer(); 343 if (!m_startContainer) 344 return; 345 ASSERT(range->endContainer()); 346 347 // Callers should be handing us well-formed ranges. If we discover that this isn't 348 // the case, we could consider changing this assertion to an early return. 349 ASSERT(range->boundaryPointsValid()); 350 351 m_startOffset = range->startOffset(); 352 m_endContainer = range->endContainer(); 353 m_endOffset = range->endOffset(); 354 355 // Set up the current node for processing. 356 m_node = range->firstNode(); 357 if (!m_node) 358 return; 359 setUpFullyClippedStack(m_fullyClippedStack, *m_node); 360 m_offset = m_node == m_startContainer ? m_startOffset : 0; 361 362 m_pastEndNode = nextInPreOrderCrossingShadowBoundaries(*m_endContainer, m_endOffset); 363 364#ifndef NDEBUG 365 // Need this just because of the assert in advance(). 366 m_positionNode = m_node; 367#endif 368 369 advance(); 370} 371 372TextIterator::~TextIterator() 373{ 374} 375 376void TextIterator::advance() 377{ 378 ASSERT(!atEnd()); 379 380 // reset the run information 381 m_positionNode = nullptr; 382 m_copyableText.reset(); 383 m_text = StringView(); 384 385 // handle remembered node that needed a newline after the text node's newline 386 if (m_needsAnotherNewline) { 387 // Emit the extra newline, and position it *inside* m_node, after m_node's 388 // contents, in case it's a block, in the same way that we position the first 389 // newline. The range for the emitted newline should start where the line 390 // break begins. 391 // FIXME: It would be cleaner if we emitted two newlines during the last 392 // iteration, instead of using m_needsAnotherNewline. 393 Node& baseNode = m_node->lastChild() ? *m_node->lastChild() : *m_node; 394 emitCharacter('\n', *baseNode.parentNode(), &baseNode, 1, 1); 395 m_needsAnotherNewline = false; 396 return; 397 } 398 399 if (!m_textBox && m_remainingTextBox) { 400 m_textBox = m_remainingTextBox; 401 m_remainingTextBox = nullptr; 402 m_firstLetterText = nullptr; 403 m_offset = 0; 404 } 405 // handle remembered text box 406 if (m_textBox) { 407 handleTextBox(); 408 if (m_positionNode) 409 return; 410 } 411 412 while (m_node && m_node != m_pastEndNode) { 413 if ((m_behavior & TextIteratorStopsOnFormControls) && HTMLFormControlElement::enclosingFormControlElement(m_node)) 414 return; 415 416 // if the range ends at offset 0 of an element, represent the 417 // position, but not the content, of that element e.g. if the 418 // node is a blockflow element, emit a newline that 419 // precedes the element 420 if (m_node == m_endContainer && !m_endOffset) { 421 representNodeOffsetZero(); 422 m_node = nullptr; 423 return; 424 } 425 426 auto* renderer = m_node->renderer(); 427 if (!renderer) { 428 m_handledNode = true; 429 m_handledChildren = true; 430 } else { 431 // handle current node according to its type 432 if (!m_handledNode) { 433 if (renderer->isText() && m_node->isTextNode()) 434 m_handledNode = handleTextNode(); 435 else if (isRendererReplacedElement(renderer)) 436 m_handledNode = handleReplacedElement(); 437 else 438 m_handledNode = handleNonTextNode(); 439 if (m_positionNode) 440 return; 441 } 442 } 443 444 // find a new current node to handle in depth-first manner, 445 // calling exitNode() as we come back thru a parent node 446 Node* next = m_handledChildren ? nullptr : m_node->firstChild(); 447 m_offset = 0; 448 if (!next) { 449 next = m_node->nextSibling(); 450 if (!next) { 451 bool pastEnd = NodeTraversal::next(m_node) == m_pastEndNode; 452 Node* parentNode = m_node->parentOrShadowHostNode(); 453 while (!next && parentNode) { 454 if ((pastEnd && parentNode == m_endContainer) || m_endContainer->isDescendantOf(parentNode)) 455 return; 456 bool haveRenderer = m_node->renderer(); 457 m_node = parentNode; 458 m_fullyClippedStack.pop(); 459 parentNode = m_node->parentOrShadowHostNode(); 460 if (haveRenderer) 461 exitNode(); 462 if (m_positionNode) { 463 m_handledNode = true; 464 m_handledChildren = true; 465 return; 466 } 467 next = m_node->nextSibling(); 468 } 469 } 470 m_fullyClippedStack.pop(); 471 } 472 473 // set the new current node 474 m_node = next; 475 if (m_node) 476 pushFullyClippedState(m_fullyClippedStack, *m_node); 477 m_handledNode = false; 478 m_handledChildren = false; 479 m_handledFirstLetter = false; 480 m_firstLetterText = nullptr; 481 482 // how would this ever be? 483 if (m_positionNode) 484 return; 485 } 486} 487 488static bool hasVisibleTextNode(RenderText& renderer) 489{ 490 if (renderer.style().visibility() == VISIBLE) 491 return true; 492 if (renderer.isTextFragment()) { 493 if (auto firstLetter = toRenderTextFragment(renderer).firstLetter()) { 494 if (firstLetter->style().visibility() == VISIBLE) 495 return true; 496 } 497 } 498 return false; 499} 500 501bool TextIterator::handleTextNode() 502{ 503 Text& textNode = toText(*m_node); 504 505 if (m_fullyClippedStack.top() && !(m_behavior & TextIteratorIgnoresStyleVisibility)) 506 return false; 507 508 auto& renderer = *textNode.renderer(); 509 510 m_lastTextNode = &textNode; 511 String rendererText = renderer.text(); 512 513 // handle pre-formatted text 514 if (!renderer.style().collapseWhiteSpace()) { 515 int runStart = m_offset; 516 if (m_lastTextNodeEndedWithCollapsedSpace && hasVisibleTextNode(renderer)) { 517 emitCharacter(' ', textNode, nullptr, runStart, runStart); 518 return false; 519 } 520 if (!m_handledFirstLetter && renderer.isTextFragment() && !m_offset) { 521 handleTextNodeFirstLetter(toRenderTextFragment(renderer)); 522 if (m_firstLetterText) { 523 String firstLetter = m_firstLetterText->text(); 524 emitText(textNode, *m_firstLetterText, m_offset, m_offset + firstLetter.length()); 525 m_firstLetterText = nullptr; 526 m_textBox = nullptr; 527 return false; 528 } 529 } 530 if (renderer.style().visibility() != VISIBLE && !(m_behavior & TextIteratorIgnoresStyleVisibility)) 531 return false; 532 int rendererTextLength = rendererText.length(); 533 int end = (&textNode == m_endContainer) ? m_endOffset : INT_MAX; 534 int runEnd = std::min(rendererTextLength, end); 535 536 if (runStart >= runEnd) 537 return true; 538 539 emitText(textNode, renderer, runStart, runEnd); 540 return true; 541 } 542 543 if (renderer.simpleLineLayout()) { 544 if (renderer.style().visibility() != VISIBLE && !(m_behavior & TextIteratorIgnoresStyleVisibility)) 545 return true; 546 // This code aims to produce same results as handleTextBox() below so test results don't change. It does not make much logical sense. 547 unsigned end = (m_node == m_endContainer) ? static_cast<unsigned>(m_endOffset) : rendererText.length(); 548 unsigned runEnd = m_offset; 549 unsigned runStart = m_offset; 550 while (runEnd < end && (renderer.style().isCollapsibleWhiteSpace(rendererText[runEnd]) || rendererText[runEnd] == '\t')) 551 ++runEnd; 552 bool addSpaceForPrevious = m_lastTextNodeEndedWithCollapsedSpace && m_lastCharacter && !renderer.style().isCollapsibleWhiteSpace(m_lastCharacter); 553 if (runEnd > runStart || addSpaceForPrevious) { 554 if (runEnd == rendererText.length()) { 555 m_lastTextNodeEndedWithCollapsedSpace = true; 556 return true; 557 } 558 bool addSpaceForCurrent = runStart || (m_lastCharacter && !renderer.style().isCollapsibleWhiteSpace(m_lastCharacter)); 559 if (addSpaceForCurrent || addSpaceForPrevious) { 560 emitCharacter(' ', textNode, nullptr, runStart, runEnd); 561 m_offset = runEnd; 562 return false; 563 } 564 runStart = runEnd; 565 } 566 while (runEnd < end && !renderer.style().isCollapsibleWhiteSpace(rendererText[runEnd])) 567 ++runEnd; 568 if (runStart < end) 569 emitText(textNode, renderer, runStart, runEnd); 570 m_offset = runEnd; 571 return runEnd == end; 572 } 573 574 if (renderer.firstTextBox()) 575 m_textBox = renderer.firstTextBox(); 576 577 bool shouldHandleFirstLetter = !m_handledFirstLetter && renderer.isTextFragment() && !m_offset; 578 if (shouldHandleFirstLetter) 579 handleTextNodeFirstLetter(toRenderTextFragment(renderer)); 580 581 if (!renderer.firstTextBox() && rendererText.length() && !shouldHandleFirstLetter) { 582 if (renderer.style().visibility() != VISIBLE && !(m_behavior & TextIteratorIgnoresStyleVisibility)) 583 return false; 584 m_lastTextNodeEndedWithCollapsedSpace = true; // entire block is collapsed space 585 return true; 586 } 587 588 // Used when text boxes are out of order (Hebrew/Arabic w/ embeded LTR text) 589 auto& boxesRenderer = m_firstLetterText ? *m_firstLetterText : renderer; 590 if (boxesRenderer.containsReversedText()) { 591 m_sortedTextBoxes.clear(); 592 for (InlineTextBox* textBox = boxesRenderer.firstTextBox(); textBox; textBox = textBox->nextTextBox()) 593 m_sortedTextBoxes.append(textBox); 594 std::sort(m_sortedTextBoxes.begin(), m_sortedTextBoxes.end(), InlineTextBox::compareByStart); 595 m_sortedTextBoxesPosition = 0; 596 m_textBox = m_sortedTextBoxes.isEmpty() ? nullptr : m_sortedTextBoxes[0]; 597 } 598 599 handleTextBox(); 600 return true; 601} 602 603void TextIterator::handleTextBox() 604{ 605 Text& textNode = toText(*m_node); 606 607 auto& renderer = m_firstLetterText ? *m_firstLetterText : *textNode.renderer(); 608 if (renderer.style().visibility() != VISIBLE && !(m_behavior & TextIteratorIgnoresStyleVisibility)) { 609 m_textBox = nullptr; 610 return; 611 } 612 String rendererText = renderer.text(); 613 unsigned start = m_offset; 614 unsigned end = (&textNode == m_endContainer) ? static_cast<unsigned>(m_endOffset) : UINT_MAX; 615 while (m_textBox) { 616 unsigned textBoxStart = m_textBox->start(); 617 unsigned runStart = std::max(textBoxStart, start); 618 619 // Check for collapsed space at the start of this run. 620 InlineTextBox* firstTextBox = renderer.containsReversedText() ? (m_sortedTextBoxes.isEmpty() ? nullptr : m_sortedTextBoxes[0]) : renderer.firstTextBox(); 621 bool needSpace = m_lastTextNodeEndedWithCollapsedSpace || (m_textBox == firstTextBox && textBoxStart == runStart && runStart); 622 if (needSpace && !renderer.style().isCollapsibleWhiteSpace(m_lastCharacter) && m_lastCharacter) { 623 if (m_lastTextNode == &textNode && runStart && rendererText[runStart - 1] == ' ') { 624 unsigned spaceRunStart = runStart - 1; 625 while (spaceRunStart && rendererText[spaceRunStart - 1] == ' ') 626 --spaceRunStart; 627 emitText(textNode, renderer, spaceRunStart, spaceRunStart + 1); 628 } else 629 emitCharacter(' ', textNode, nullptr, runStart, runStart); 630 return; 631 } 632 unsigned textBoxEnd = textBoxStart + m_textBox->len(); 633 unsigned runEnd = std::min(textBoxEnd, end); 634 635 // Determine what the next text box will be, but don't advance yet 636 InlineTextBox* nextTextBox = nullptr; 637 if (renderer.containsReversedText()) { 638 if (m_sortedTextBoxesPosition + 1 < m_sortedTextBoxes.size()) 639 nextTextBox = m_sortedTextBoxes[m_sortedTextBoxesPosition + 1]; 640 } else 641 nextTextBox = m_textBox->nextTextBox(); 642 ASSERT(!nextTextBox || &nextTextBox->renderer() == &renderer); 643 644 if (runStart < runEnd) { 645 // Handle either a single newline character (which becomes a space), 646 // or a run of characters that does not include a newline. 647 // This effectively translates newlines to spaces without copying the text. 648 if (rendererText[runStart] == '\n') { 649 emitCharacter(' ', textNode, nullptr, runStart, runStart + 1); 650 m_offset = runStart + 1; 651 } else { 652 size_t subrunEnd = rendererText.find('\n', runStart); 653 if (subrunEnd == notFound || subrunEnd > runEnd) { 654 subrunEnd = runEnd; 655 bool lastSpaceCollapsedByNextNonTextBox = !nextTextBox && (m_behavior & TextIteratorBehavesAsIfNodesFollowing) && rendererText.length() > runEnd; 656 if (lastSpaceCollapsedByNextNonTextBox) 657 ++subrunEnd; // runEnd stopped before last space. Increment by one to restore the space. 658 } 659 m_offset = subrunEnd; 660 emitText(textNode, renderer, runStart, subrunEnd); 661 } 662 663 // If we are doing a subrun that doesn't go to the end of the text box, 664 // come back again to finish handling this text box; don't advance to the next one. 665 if (static_cast<unsigned>(m_positionEndOffset) < textBoxEnd) 666 return; 667 668 // Advance and return 669 unsigned nextRunStart = nextTextBox ? nextTextBox->start() : rendererText.length(); 670 if (nextRunStart > runEnd) 671 m_lastTextNodeEndedWithCollapsedSpace = true; // collapsed space between runs or at the end 672 m_textBox = nextTextBox; 673 if (renderer.containsReversedText()) 674 ++m_sortedTextBoxesPosition; 675 return; 676 } 677 // Advance and continue 678 m_textBox = nextTextBox; 679 if (renderer.containsReversedText()) 680 ++m_sortedTextBoxesPosition; 681 } 682 if (!m_textBox && m_remainingTextBox) { 683 m_textBox = m_remainingTextBox; 684 m_remainingTextBox = nullptr; 685 m_firstLetterText = nullptr; 686 m_offset = 0; 687 handleTextBox(); 688 } 689} 690 691static inline RenderText* firstRenderTextInFirstLetter(RenderBoxModelObject* firstLetter) 692{ 693 if (!firstLetter) 694 return nullptr; 695 696 // FIXME: Should this check descendent objects? 697 return childrenOfType<RenderText>(*firstLetter).first(); 698} 699 700void TextIterator::handleTextNodeFirstLetter(RenderTextFragment& renderer) 701{ 702 if (auto* firstLetter = renderer.firstLetter()) { 703 if (firstLetter->style().visibility() != VISIBLE && !(m_behavior & TextIteratorIgnoresStyleVisibility)) 704 return; 705 if (auto* firstLetterText = firstRenderTextInFirstLetter(firstLetter)) { 706 m_handledFirstLetter = true; 707 m_remainingTextBox = m_textBox; 708 m_textBox = firstLetterText->firstTextBox(); 709 m_sortedTextBoxes.clear(); 710 m_firstLetterText = firstLetterText; 711 } 712 } 713 m_handledFirstLetter = true; 714} 715 716bool TextIterator::handleReplacedElement() 717{ 718 if (m_fullyClippedStack.top()) 719 return false; 720 721 auto& renderer = *m_node->renderer(); 722 if (renderer.style().visibility() != VISIBLE && !(m_behavior & TextIteratorIgnoresStyleVisibility)) 723 return false; 724 725 if (m_lastTextNodeEndedWithCollapsedSpace) { 726 emitCharacter(' ', *m_lastTextNode->parentNode(), m_lastTextNode, 1, 1); 727 return false; 728 } 729 730 if ((m_behavior & TextIteratorEntersTextControls) && renderer.isTextControl()) { 731 if (TextControlInnerTextElement* innerTextElement = toRenderTextControl(renderer).textFormControlElement().innerTextElement()) { 732 m_node = innerTextElement->containingShadowRoot(); 733 pushFullyClippedState(m_fullyClippedStack, *m_node); 734 m_offset = 0; 735 return false; 736 } 737 } 738 739 m_hasEmitted = true; 740 741 if ((m_behavior & TextIteratorEmitsObjectReplacementCharacters) && renderer.isReplaced()) { 742 emitCharacter(objectReplacementCharacter, *m_node->parentNode(), m_node, 0, 1); 743 // Don't process subtrees for embedded objects. If the text there is required, 744 // it must be explicitly asked by specifying a range falling inside its boundaries. 745 m_handledChildren = true; 746 return true; 747 } 748 749 if (m_behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions) { 750 // We want replaced elements to behave like punctuation for boundary 751 // finding, and to simply take up space for the selection preservation 752 // code in moveParagraphs, so we use a comma. 753 emitCharacter(',', *m_node->parentNode(), m_node, 0, 1); 754 return true; 755 } 756 757 m_positionNode = m_node->parentNode(); 758 m_positionOffsetBaseNode = m_node; 759 m_positionStartOffset = 0; 760 m_positionEndOffset = 1; 761 762 if ((m_behavior & TextIteratorEmitsImageAltText) && renderer.isRenderImage()) { 763 String altText = toRenderImage(renderer).altText(); 764 if (unsigned length = altText.length()) { 765 m_lastCharacter = altText[length - 1]; 766 m_copyableText.set(WTF::move(altText)); 767 m_text = m_copyableText.text(); 768 return true; 769 } 770 } 771 772 m_copyableText.reset(); 773 m_text = StringView(); 774 m_lastCharacter = 0; 775 return true; 776} 777 778static bool shouldEmitTabBeforeNode(Node& node) 779{ 780 auto* renderer = node.renderer(); 781 782 // Table cells are delimited by tabs. 783 if (!renderer || !isTableCell(&node)) 784 return false; 785 786 // Want a tab before every cell other than the first one. 787 RenderTableCell& cell = toRenderTableCell(*renderer); 788 RenderTable* table = cell.table(); 789 return table && (table->cellBefore(&cell) || table->cellAbove(&cell)); 790} 791 792static bool shouldEmitNewlineForNode(Node* node, bool emitsOriginalText) 793{ 794 auto* renderer = node->renderer(); 795 if (!(renderer ? renderer->isBR() : node->hasTagName(brTag))) 796 return false; 797 return emitsOriginalText || !(node->isInShadowTree() && node->shadowHost()->toInputElement()); 798} 799 800static bool hasHeaderTag(HTMLElement& element) 801{ 802 return element.hasTagName(h1Tag) 803 || element.hasTagName(h2Tag) 804 || element.hasTagName(h3Tag) 805 || element.hasTagName(h4Tag) 806 || element.hasTagName(h5Tag) 807 || element.hasTagName(h6Tag); 808} 809 810static bool shouldEmitNewlinesBeforeAndAfterNode(Node& node) 811{ 812 // Block flow (versus inline flow) is represented by having 813 // a newline both before and after the element. 814 auto* renderer = node.renderer(); 815 if (!renderer) { 816 if (!node.isHTMLElement()) 817 return false; 818 auto& element = toHTMLElement(node); 819 return hasHeaderTag(element) 820 || element.hasTagName(blockquoteTag) 821 || element.hasTagName(ddTag) 822 || element.hasTagName(divTag) 823 || element.hasTagName(dlTag) 824 || element.hasTagName(dtTag) 825 || element.hasTagName(hrTag) 826 || element.hasTagName(liTag) 827 || element.hasTagName(listingTag) 828 || element.hasTagName(olTag) 829 || element.hasTagName(pTag) 830 || element.hasTagName(preTag) 831 || element.hasTagName(trTag) 832 || element.hasTagName(ulTag); 833 } 834 835 // Need to make an exception for table cells, because they are blocks, but we 836 // want them tab-delimited rather than having newlines before and after. 837 if (isTableCell(&node)) 838 return false; 839 840 // Need to make an exception for table row elements, because they are neither 841 // "inline" or "RenderBlock", but we want newlines for them. 842 if (renderer->isTableRow()) { 843 RenderTable* table = toRenderTableRow(*renderer).table(); 844 if (table && !table->isInline()) 845 return true; 846 } 847 848 return !renderer->isInline() 849 && renderer->isRenderBlock() 850 && !renderer->isFloatingOrOutOfFlowPositioned() 851 && !renderer->isBody() 852 && !renderer->isRubyText(); 853} 854 855static bool shouldEmitNewlineAfterNode(Node& node) 856{ 857 // FIXME: It should be better but slower to create a VisiblePosition here. 858 if (!shouldEmitNewlinesBeforeAndAfterNode(node)) 859 return false; 860 // Check if this is the very last renderer in the document. 861 // If so, then we should not emit a newline. 862 Node* subsequentNode = &node; 863 while ((subsequentNode = NodeTraversal::nextSkippingChildren(subsequentNode))) { 864 if (subsequentNode->renderer()) 865 return true; 866 } 867 return false; 868} 869 870static bool shouldEmitNewlineBeforeNode(Node& node) 871{ 872 return shouldEmitNewlinesBeforeAndAfterNode(node); 873} 874 875static bool shouldEmitExtraNewlineForNode(Node& node) 876{ 877 // When there is a significant collapsed bottom margin, emit an extra 878 // newline for a more realistic result. We end up getting the right 879 // result even without margin collapsing. For example: <div><p>text</p></div> 880 // will work right even if both the <div> and the <p> have bottom margins. 881 882 auto* renderer = node.renderer(); 883 if (!renderer || !renderer->isBox()) 884 return false; 885 886 // NOTE: We only do this for a select set of nodes, and WinIE appears not to do this at all. 887 if (!node.isHTMLElement()) 888 return false; 889 if (!(hasHeaderTag(toHTMLElement(node)) || toHTMLElement(node).hasTagName(pTag))) 890 return false; 891 892 int bottomMargin = toRenderBox(renderer)->collapsedMarginAfter(); 893 int fontSize = toRenderBox(renderer)->style().fontDescription().computedPixelSize(); 894 return bottomMargin * 2 >= fontSize; 895} 896 897static int collapsedSpaceLength(RenderText& renderer, int textEnd) 898{ 899 StringImpl& text = *renderer.text(); 900 unsigned length = text.length(); 901 for (unsigned i = textEnd; i < length; ++i) { 902 if (!renderer.style().isCollapsibleWhiteSpace(text[i])) 903 return i - textEnd; 904 } 905 return length - textEnd; 906} 907 908static int maxOffsetIncludingCollapsedSpaces(Node& node) 909{ 910 int offset = caretMaxOffset(&node); 911 if (auto* renderer = node.renderer()) { 912 if (renderer->isText()) 913 offset += collapsedSpaceLength(toRenderText(*renderer), offset); 914 } 915 return offset; 916} 917 918// Whether or not we should emit a character as we enter m_node (if it's a container) or as we hit it (if it's atomic). 919bool TextIterator::shouldRepresentNodeOffsetZero() 920{ 921 if ((m_behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions) && m_node->renderer() && m_node->renderer()->isTable()) 922 return true; 923 924 // Leave element positioned flush with start of a paragraph 925 // (e.g. do not insert tab before a table cell at the start of a paragraph) 926 if (m_lastCharacter == '\n') 927 return false; 928 929 // Otherwise, show the position if we have emitted any characters 930 if (m_hasEmitted) 931 return true; 932 933 // We've not emitted anything yet. Generally, there is no need for any positioning then. 934 // The only exception is when the element is visually not in the same line as 935 // the start of the range (e.g. the range starts at the end of the previous paragraph). 936 // NOTE: Creating VisiblePositions and comparing them is relatively expensive, so we 937 // make quicker checks to possibly avoid that. Another check that we could make is 938 // is whether the inline vs block flow changed since the previous visible element. 939 // I think we're already in a special enough case that that won't be needed, tho. 940 941 // No character needed if this is the first node in the range. 942 if (m_node == m_startContainer) 943 return false; 944 945 // If we are outside the start container's subtree, assume we need to emit. 946 // FIXME: m_startContainer could be an inline block 947 if (!m_node->isDescendantOf(m_startContainer)) 948 return true; 949 950 // If we started as m_startContainer offset 0 and the current node is a descendant of 951 // the start container, we already had enough context to correctly decide whether to 952 // emit after a preceding block. We chose not to emit (m_hasEmitted is false), 953 // so don't second guess that now. 954 // NOTE: Is this really correct when m_node is not a leftmost descendant? Probably 955 // immaterial since we likely would have already emitted something by now. 956 if (m_startOffset == 0) 957 return false; 958 959 // If this node is unrendered or invisible the VisiblePosition checks below won't have much meaning. 960 // Additionally, if the range we are iterating over contains huge sections of unrendered content, 961 // we would create VisiblePositions on every call to this function without this check. 962 if (!m_node->renderer() || m_node->renderer()->style().visibility() != VISIBLE 963 || (m_node->renderer()->isRenderBlockFlow() && !toRenderBlock(m_node->renderer())->height() && !m_node->hasTagName(bodyTag))) 964 return false; 965 966 // The startPos.isNotNull() check is needed because the start could be before the body, 967 // and in that case we'll get null. We don't want to put in newlines at the start in that case. 968 // The currPos.isNotNull() check is needed because positions in non-HTML content 969 // (like SVG) do not have visible positions, and we don't want to emit for them either. 970 VisiblePosition startPos = VisiblePosition(Position(m_startContainer, m_startOffset, Position::PositionIsOffsetInAnchor), DOWNSTREAM); 971 VisiblePosition currPos = VisiblePosition(positionBeforeNode(m_node), DOWNSTREAM); 972 return startPos.isNotNull() && currPos.isNotNull() && !inSameLine(startPos, currPos); 973} 974 975bool TextIterator::shouldEmitSpaceBeforeAndAfterNode(Node& node) 976{ 977 return node.renderer() && node.renderer()->isTable() && (node.renderer()->isInline() || (m_behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions)); 978} 979 980void TextIterator::representNodeOffsetZero() 981{ 982 // Emit a character to show the positioning of m_node. 983 984 // When we haven't been emitting any characters, shouldRepresentNodeOffsetZero() can 985 // create VisiblePositions, which is expensive. So, we perform the inexpensive checks 986 // on m_node to see if it necessitates emitting a character first and will early return 987 // before encountering shouldRepresentNodeOffsetZero()s worse case behavior. 988 if (shouldEmitTabBeforeNode(*m_node)) { 989 if (shouldRepresentNodeOffsetZero()) 990 emitCharacter('\t', *m_node->parentNode(), m_node, 0, 0); 991 } else if (shouldEmitNewlineBeforeNode(*m_node)) { 992 if (shouldRepresentNodeOffsetZero()) 993 emitCharacter('\n', *m_node->parentNode(), m_node, 0, 0); 994 } else if (shouldEmitSpaceBeforeAndAfterNode(*m_node)) { 995 if (shouldRepresentNodeOffsetZero()) 996 emitCharacter(' ', *m_node->parentNode(), m_node, 0, 0); 997 } 998} 999 1000bool TextIterator::handleNonTextNode() 1001{ 1002 if (shouldEmitNewlineForNode(m_node, m_behavior & TextIteratorEmitsOriginalText)) 1003 emitCharacter('\n', *m_node->parentNode(), m_node, 0, 1); 1004 else if ((m_behavior & TextIteratorEmitsCharactersBetweenAllVisiblePositions) && m_node->renderer() && m_node->renderer()->isHR()) 1005 emitCharacter(' ', *m_node->parentNode(), m_node, 0, 1); 1006 else 1007 representNodeOffsetZero(); 1008 1009 return true; 1010} 1011 1012void TextIterator::exitNode() 1013{ 1014 // prevent emitting a newline when exiting a collapsed block at beginning of the range 1015 // FIXME: !m_hasEmitted does not necessarily mean there was a collapsed block... it could 1016 // have been an hr (e.g.). Also, a collapsed block could have height (e.g. a table) and 1017 // therefore look like a blank line. 1018 if (!m_hasEmitted) 1019 return; 1020 1021 // Emit with a position *inside* m_node, after m_node's contents, in 1022 // case it is a block, because the run should start where the 1023 // emitted character is positioned visually. 1024 Node* baseNode = m_node->lastChild() ? m_node->lastChild() : m_node; 1025 // FIXME: This shouldn't require the m_lastTextNode to be true, but we can't change that without making 1026 // the logic in _web_attributedStringFromRange match. We'll get that for free when we switch to use 1027 // TextIterator in _web_attributedStringFromRange. 1028 // See <rdar://problem/5428427> for an example of how this mismatch will cause problems. 1029 if (m_lastTextNode && shouldEmitNewlineAfterNode(*m_node)) { 1030 // use extra newline to represent margin bottom, as needed 1031 bool addNewline = shouldEmitExtraNewlineForNode(*m_node); 1032 1033 // FIXME: We need to emit a '\n' as we leave an empty block(s) that 1034 // contain a VisiblePosition when doing selection preservation. 1035 if (m_lastCharacter != '\n') { 1036 // insert a newline with a position following this block's contents. 1037 emitCharacter('\n', *baseNode->parentNode(), baseNode, 1, 1); 1038 // remember whether to later add a newline for the current node 1039 ASSERT(!m_needsAnotherNewline); 1040 m_needsAnotherNewline = addNewline; 1041 } else if (addNewline) 1042 // insert a newline with a position following this block's contents. 1043 emitCharacter('\n', *baseNode->parentNode(), baseNode, 1, 1); 1044 } 1045 1046 // If nothing was emitted, see if we need to emit a space. 1047 if (!m_positionNode && shouldEmitSpaceBeforeAndAfterNode(*m_node)) 1048 emitCharacter(' ', *baseNode->parentNode(), baseNode, 1, 1); 1049} 1050 1051void TextIterator::emitCharacter(UChar character, Node& characterNode, Node* offsetBaseNode, int textStartOffset, int textEndOffset) 1052{ 1053 m_hasEmitted = true; 1054 1055 // remember information with which to construct the TextIterator::range() 1056 m_positionNode = &characterNode; 1057 m_positionOffsetBaseNode = offsetBaseNode; 1058 m_positionStartOffset = textStartOffset; 1059 m_positionEndOffset = textEndOffset; 1060 1061 m_copyableText.set(character); 1062 m_text = m_copyableText.text(); 1063 m_lastCharacter = character; 1064 m_lastTextNodeEndedWithCollapsedSpace = false; 1065} 1066 1067void TextIterator::emitText(Text& textNode, RenderText& renderer, int textStartOffset, int textEndOffset) 1068{ 1069 ASSERT(textStartOffset >= 0); 1070 ASSERT(textEndOffset >= 0); 1071 ASSERT(textStartOffset <= textEndOffset); 1072 1073 // FIXME: This probably yields the wrong offsets when text-transform: lowercase turns a single character into two characters. 1074 String string = (m_behavior & TextIteratorEmitsOriginalText) ? renderer.originalText() 1075 : ((m_behavior & TextIteratorEmitsTextsWithoutTranscoding) ? renderer.textWithoutConvertingBackslashToYenSymbol() : renderer.text()); 1076 1077 ASSERT(string.length() >= static_cast<unsigned>(textEndOffset)); 1078 1079 m_positionNode = &textNode; 1080 m_positionOffsetBaseNode = nullptr; 1081 m_positionStartOffset = textStartOffset; 1082 m_positionEndOffset = textEndOffset; 1083 1084 m_lastCharacter = string[textEndOffset - 1]; 1085 m_copyableText.set(WTF::move(string), textStartOffset, textEndOffset - textStartOffset); 1086 m_text = m_copyableText.text(); 1087 1088 m_lastTextNodeEndedWithCollapsedSpace = false; 1089 m_hasEmitted = true; 1090} 1091 1092PassRefPtr<Range> TextIterator::range() const 1093{ 1094 ASSERT(!atEnd()); 1095 1096 // use the current run information, if we have it 1097 if (m_positionOffsetBaseNode) { 1098 int index = m_positionOffsetBaseNode->nodeIndex(); 1099 m_positionStartOffset += index; 1100 m_positionEndOffset += index; 1101 m_positionOffsetBaseNode = nullptr; 1102 } 1103 return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset); 1104} 1105 1106Node* TextIterator::node() const 1107{ 1108 RefPtr<Range> textRange = range(); 1109 1110 Node* node = textRange->startContainer(); 1111 if (node->offsetInCharacters()) 1112 return node; 1113 1114 return node->childNode(textRange->startOffset()); 1115} 1116 1117// -------- 1118 1119SimplifiedBackwardsTextIterator::SimplifiedBackwardsTextIterator(const Range& range, TextIteratorBehavior behavior) 1120 : m_behavior(behavior) 1121 , m_node(nullptr) 1122 , m_offset(0) 1123 , m_handledNode(false) 1124 , m_handledChildren(false) 1125 , m_startContainer(nullptr) 1126 , m_startOffset(0) 1127 , m_endContainer(nullptr) 1128 , m_endOffset(0) 1129 , m_positionNode(nullptr) 1130 , m_positionStartOffset(0) 1131 , m_positionEndOffset(0) 1132 , m_lastTextNode(nullptr) 1133 , m_lastCharacter(0) 1134 , m_havePassedStartContainer(false) 1135 , m_shouldHandleFirstLetter(false) 1136{ 1137 ASSERT(behavior == TextIteratorDefaultBehavior || behavior == TextIteratorStopsOnFormControls); 1138 1139 range.ownerDocument().updateLayoutIgnorePendingStylesheets(); 1140 1141 Node* startNode = range.startContainer(); 1142 if (!startNode) 1143 return; 1144 Node* endNode = range.endContainer(); 1145 int startOffset = range.startOffset(); 1146 int endOffset = range.endOffset(); 1147 1148 if (!startNode->offsetInCharacters()) { 1149 if (startOffset >= 0 && startOffset < static_cast<int>(startNode->childNodeCount())) { 1150 startNode = startNode->childNode(startOffset); 1151 startOffset = 0; 1152 } 1153 } 1154 if (!endNode->offsetInCharacters()) { 1155 if (endOffset > 0 && endOffset <= static_cast<int>(endNode->childNodeCount())) { 1156 endNode = endNode->childNode(endOffset - 1); 1157 endOffset = lastOffsetInNode(endNode); 1158 } 1159 } 1160 1161 m_node = endNode; 1162 setUpFullyClippedStack(m_fullyClippedStack, *m_node); 1163 m_offset = endOffset; 1164 m_handledNode = false; 1165 m_handledChildren = endOffset == 0; 1166 1167 m_startContainer = startNode; 1168 m_startOffset = startOffset; 1169 m_endContainer = endNode; 1170 m_endOffset = endOffset; 1171 1172#ifndef NDEBUG 1173 // Need this just because of the assert. 1174 m_positionNode = endNode; 1175#endif 1176 1177 m_lastTextNode = nullptr; 1178 m_lastCharacter = '\n'; 1179 1180 m_havePassedStartContainer = false; 1181 1182 advance(); 1183} 1184 1185void SimplifiedBackwardsTextIterator::advance() 1186{ 1187 ASSERT(!atEnd()); 1188 1189 m_positionNode = nullptr; 1190 m_copyableText.reset(); 1191 m_text = StringView(); 1192 1193 if ((m_behavior & TextIteratorStopsOnFormControls) && HTMLFormControlElement::enclosingFormControlElement(m_node)) 1194 return; 1195 1196 while (m_node && !m_havePassedStartContainer) { 1197 // Don't handle node if we start iterating at [node, 0]. 1198 if (!m_handledNode && !(m_node == m_endContainer && !m_endOffset)) { 1199 auto* renderer = m_node->renderer(); 1200 if (renderer && renderer->isText() && m_node->isTextNode()) { 1201 if (renderer->style().visibility() == VISIBLE && m_offset > 0) 1202 m_handledNode = handleTextNode(); 1203 } else if (renderer && (renderer->isImage() || renderer->isWidget())) { 1204 if (renderer->style().visibility() == VISIBLE && m_offset > 0) 1205 m_handledNode = handleReplacedElement(); 1206 } else 1207 m_handledNode = handleNonTextNode(); 1208 if (m_positionNode) 1209 return; 1210 } 1211 1212 if (!m_handledChildren && m_node->hasChildNodes()) { 1213 m_node = m_node->lastChild(); 1214 pushFullyClippedState(m_fullyClippedStack, *m_node); 1215 } else { 1216 // Exit empty containers as we pass over them or containers 1217 // where [container, 0] is where we started iterating. 1218 if (!m_handledNode 1219 && canHaveChildrenForEditing(m_node) 1220 && m_node->parentNode() 1221 && (!m_node->lastChild() || (m_node == m_endContainer && !m_endOffset))) { 1222 exitNode(); 1223 if (m_positionNode) { 1224 m_handledNode = true; 1225 m_handledChildren = true; 1226 return; 1227 } 1228 } 1229 1230 // Exit all other containers. 1231 while (!m_node->previousSibling()) { 1232 if (!advanceRespectingRange(m_node->parentOrShadowHostNode())) 1233 break; 1234 m_fullyClippedStack.pop(); 1235 exitNode(); 1236 if (m_positionNode) { 1237 m_handledNode = true; 1238 m_handledChildren = true; 1239 return; 1240 } 1241 } 1242 1243 m_fullyClippedStack.pop(); 1244 if (advanceRespectingRange(m_node->previousSibling())) 1245 pushFullyClippedState(m_fullyClippedStack, *m_node); 1246 else 1247 m_node = nullptr; 1248 } 1249 1250 // For the purpose of word boundary detection, 1251 // we should iterate all visible text and trailing (collapsed) whitespaces. 1252 m_offset = m_node ? maxOffsetIncludingCollapsedSpaces(*m_node) : 0; 1253 m_handledNode = false; 1254 m_handledChildren = false; 1255 1256 if (m_positionNode) 1257 return; 1258 } 1259} 1260 1261bool SimplifiedBackwardsTextIterator::handleTextNode() 1262{ 1263 Text& textNode = toText(*m_node); 1264 1265 m_lastTextNode = &textNode; 1266 1267 int startOffset; 1268 int offsetInNode; 1269 RenderText* renderer = handleFirstLetter(startOffset, offsetInNode); 1270 if (!renderer) 1271 return true; 1272 1273 String text = renderer->text(); 1274 if (!renderer->hasRenderedText() && text.length()) 1275 return true; 1276 1277 if (startOffset + offsetInNode == m_offset) { 1278 ASSERT(!m_shouldHandleFirstLetter); 1279 return true; 1280 } 1281 1282 m_positionEndOffset = m_offset; 1283 m_offset = startOffset + offsetInNode; 1284 m_positionNode = m_node; 1285 m_positionStartOffset = m_offset; 1286 1287 ASSERT(m_positionStartOffset < m_positionEndOffset); 1288 ASSERT(m_positionStartOffset - offsetInNode >= 0); 1289 ASSERT(m_positionEndOffset - offsetInNode > 0); 1290 ASSERT(m_positionEndOffset - offsetInNode <= static_cast<int>(text.length())); 1291 1292 m_lastCharacter = text[m_positionEndOffset - offsetInNode - 1]; 1293 m_copyableText.set(WTF::move(text), m_positionStartOffset - offsetInNode, m_positionEndOffset - m_positionStartOffset); 1294 m_text = m_copyableText.text(); 1295 1296 return !m_shouldHandleFirstLetter; 1297} 1298 1299RenderText* SimplifiedBackwardsTextIterator::handleFirstLetter(int& startOffset, int& offsetInNode) 1300{ 1301 RenderText* renderer = toRenderText(m_node->renderer()); 1302 startOffset = (m_node == m_startContainer) ? m_startOffset : 0; 1303 1304 if (!renderer->isTextFragment()) { 1305 offsetInNode = 0; 1306 return renderer; 1307 } 1308 1309 RenderTextFragment* fragment = toRenderTextFragment(renderer); 1310 int offsetAfterFirstLetter = fragment->start(); 1311 if (startOffset >= offsetAfterFirstLetter) { 1312 ASSERT(!m_shouldHandleFirstLetter); 1313 offsetInNode = offsetAfterFirstLetter; 1314 return renderer; 1315 } 1316 1317 if (!m_shouldHandleFirstLetter && startOffset + offsetAfterFirstLetter < m_offset) { 1318 m_shouldHandleFirstLetter = true; 1319 offsetInNode = offsetAfterFirstLetter; 1320 return renderer; 1321 } 1322 1323 m_shouldHandleFirstLetter = false; 1324 offsetInNode = 0; 1325 return firstRenderTextInFirstLetter(fragment->firstLetter()); 1326} 1327 1328bool SimplifiedBackwardsTextIterator::handleReplacedElement() 1329{ 1330 unsigned index = m_node->nodeIndex(); 1331 // We want replaced elements to behave like punctuation for boundary 1332 // finding, and to simply take up space for the selection preservation 1333 // code in moveParagraphs, so we use a comma. Unconditionally emit 1334 // here because this iterator is only used for boundary finding. 1335 emitCharacter(',', *m_node->parentNode(), index, index + 1); 1336 return true; 1337} 1338 1339bool SimplifiedBackwardsTextIterator::handleNonTextNode() 1340{ 1341 // We can use a linefeed in place of a tab because this simple iterator is only used to 1342 // find boundaries, not actual content. A linefeed breaks words, sentences, and paragraphs. 1343 if (shouldEmitNewlineForNode(m_node, m_behavior & TextIteratorEmitsOriginalText) || shouldEmitNewlineAfterNode(*m_node) || shouldEmitTabBeforeNode(*m_node)) { 1344 unsigned index = m_node->nodeIndex(); 1345 // The start of this emitted range is wrong. Ensuring correctness would require 1346 // VisiblePositions and so would be slow. previousBoundary expects this. 1347 emitCharacter('\n', *m_node->parentNode(), index + 1, index + 1); 1348 } 1349 return true; 1350} 1351 1352void SimplifiedBackwardsTextIterator::exitNode() 1353{ 1354 if (shouldEmitNewlineForNode(m_node, m_behavior & TextIteratorEmitsOriginalText) || shouldEmitNewlineBeforeNode(*m_node) || shouldEmitTabBeforeNode(*m_node)) { 1355 // The start of this emitted range is wrong. Ensuring correctness would require 1356 // VisiblePositions and so would be slow. previousBoundary expects this. 1357 emitCharacter('\n', *m_node, 0, 0); 1358 } 1359} 1360 1361void SimplifiedBackwardsTextIterator::emitCharacter(UChar c, Node& node, int startOffset, int endOffset) 1362{ 1363 m_positionNode = &node; 1364 m_positionStartOffset = startOffset; 1365 m_positionEndOffset = endOffset; 1366 m_copyableText.set(c); 1367 m_text = m_copyableText.text(); 1368 m_lastCharacter = c; 1369} 1370 1371bool SimplifiedBackwardsTextIterator::advanceRespectingRange(Node* next) 1372{ 1373 if (!next) 1374 return false; 1375 m_havePassedStartContainer |= m_node == m_startContainer; 1376 if (m_havePassedStartContainer) 1377 return false; 1378 m_node = next; 1379 return true; 1380} 1381 1382PassRefPtr<Range> SimplifiedBackwardsTextIterator::range() const 1383{ 1384 ASSERT(!atEnd()); 1385 1386 return Range::create(m_positionNode->document(), m_positionNode, m_positionStartOffset, m_positionNode, m_positionEndOffset); 1387} 1388 1389// -------- 1390 1391CharacterIterator::CharacterIterator(const Range& range, TextIteratorBehavior behavior) 1392 : m_underlyingIterator(&range, behavior) 1393 , m_offset(0) 1394 , m_runOffset(0) 1395 , m_atBreak(true) 1396{ 1397 while (!atEnd() && !m_underlyingIterator.text().length()) 1398 m_underlyingIterator.advance(); 1399} 1400 1401PassRefPtr<Range> CharacterIterator::range() const 1402{ 1403 RefPtr<Range> r = m_underlyingIterator.range(); 1404 if (!m_underlyingIterator.atEnd()) { 1405 if (m_underlyingIterator.text().length() <= 1) { 1406 ASSERT(m_runOffset == 0); 1407 } else { 1408 Node* n = r->startContainer(); 1409 ASSERT(n == r->endContainer()); 1410 int offset = r->startOffset() + m_runOffset; 1411 r->setStart(n, offset); 1412 r->setEnd(n, offset + 1); 1413 } 1414 } 1415 return r.release(); 1416} 1417 1418void CharacterIterator::advance(int count) 1419{ 1420 if (count <= 0) { 1421 ASSERT(count == 0); 1422 return; 1423 } 1424 1425 m_atBreak = false; 1426 1427 // easy if there is enough left in the current m_underlyingIterator run 1428 int remaining = m_underlyingIterator.text().length() - m_runOffset; 1429 if (count < remaining) { 1430 m_runOffset += count; 1431 m_offset += count; 1432 return; 1433 } 1434 1435 // exhaust the current m_underlyingIterator run 1436 count -= remaining; 1437 m_offset += remaining; 1438 1439 // move to a subsequent m_underlyingIterator run 1440 for (m_underlyingIterator.advance(); !atEnd(); m_underlyingIterator.advance()) { 1441 int runLength = m_underlyingIterator.text().length(); 1442 if (!runLength) 1443 m_atBreak = true; 1444 else { 1445 // see whether this is m_underlyingIterator to use 1446 if (count < runLength) { 1447 m_runOffset = count; 1448 m_offset += count; 1449 return; 1450 } 1451 1452 // exhaust this m_underlyingIterator run 1453 count -= runLength; 1454 m_offset += runLength; 1455 } 1456 } 1457 1458 // ran to the end of the m_underlyingIterator... no more runs left 1459 m_atBreak = true; 1460 m_runOffset = 0; 1461} 1462 1463static PassRefPtr<Range> characterSubrange(CharacterIterator& it, int offset, int length) 1464{ 1465 it.advance(offset); 1466 RefPtr<Range> start = it.range(); 1467 1468 if (length > 1) 1469 it.advance(length - 1); 1470 RefPtr<Range> end = it.range(); 1471 1472 return Range::create(start->startContainer()->document(), 1473 start->startContainer(), start->startOffset(), 1474 end->endContainer(), end->endOffset()); 1475} 1476 1477BackwardsCharacterIterator::BackwardsCharacterIterator(const Range& range) 1478 : m_underlyingIterator(range, TextIteratorDefaultBehavior) 1479 , m_offset(0) 1480 , m_runOffset(0) 1481 , m_atBreak(true) 1482{ 1483 while (!atEnd() && !m_underlyingIterator.text().length()) 1484 m_underlyingIterator.advance(); 1485} 1486 1487PassRefPtr<Range> BackwardsCharacterIterator::range() const 1488{ 1489 RefPtr<Range> r = m_underlyingIterator.range(); 1490 if (!m_underlyingIterator.atEnd()) { 1491 if (m_underlyingIterator.text().length() <= 1) 1492 ASSERT(m_runOffset == 0); 1493 else { 1494 Node* n = r->startContainer(); 1495 ASSERT(n == r->endContainer()); 1496 int offset = r->endOffset() - m_runOffset; 1497 r->setStart(n, offset - 1); 1498 r->setEnd(n, offset); 1499 } 1500 } 1501 return r.release(); 1502} 1503 1504void BackwardsCharacterIterator::advance(int count) 1505{ 1506 if (count <= 0) { 1507 ASSERT(!count); 1508 return; 1509 } 1510 1511 m_atBreak = false; 1512 1513 int remaining = m_underlyingIterator.text().length() - m_runOffset; 1514 if (count < remaining) { 1515 m_runOffset += count; 1516 m_offset += count; 1517 return; 1518 } 1519 1520 count -= remaining; 1521 m_offset += remaining; 1522 1523 for (m_underlyingIterator.advance(); !atEnd(); m_underlyingIterator.advance()) { 1524 int runLength = m_underlyingIterator.text().length(); 1525 if (runLength == 0) 1526 m_atBreak = true; 1527 else { 1528 if (count < runLength) { 1529 m_runOffset = count; 1530 m_offset += count; 1531 return; 1532 } 1533 1534 count -= runLength; 1535 m_offset += runLength; 1536 } 1537 } 1538 1539 m_atBreak = true; 1540 m_runOffset = 0; 1541} 1542 1543// -------- 1544 1545WordAwareIterator::WordAwareIterator(const Range& range) 1546 : m_underlyingIterator(&range) 1547 , m_didLookAhead(true) // so we consider the first chunk from the text iterator 1548{ 1549 advance(); // get in position over the first chunk of text 1550} 1551 1552// We're always in one of these modes: 1553// - The current chunk in the text iterator is our current chunk 1554// (typically its a piece of whitespace, or text that ended with whitespace) 1555// - The previous chunk in the text iterator is our current chunk 1556// (we looked ahead to the next chunk and found a word boundary) 1557// - We built up our own chunk of text from many chunks from the text iterator 1558 1559// FIXME: Performance could be bad for huge spans next to each other that don't fall on word boundaries. 1560 1561void WordAwareIterator::advance() 1562{ 1563 m_previousText.reset(); 1564 m_buffer.clear(); 1565 1566 // If last time we did a look-ahead, start with that looked-ahead chunk now 1567 if (!m_didLookAhead) { 1568 ASSERT(!m_underlyingIterator.atEnd()); 1569 m_underlyingIterator.advance(); 1570 } 1571 m_didLookAhead = false; 1572 1573 // Go to next non-empty chunk 1574 while (!m_underlyingIterator.atEnd() && !m_underlyingIterator.text().length()) 1575 m_underlyingIterator.advance(); 1576 if (m_underlyingIterator.atEnd()) 1577 return; 1578 1579 while (1) { 1580 // If this chunk ends in whitespace we can just use it as our chunk. 1581 if (isSpaceOrNewline(m_underlyingIterator.text()[m_underlyingIterator.text().length() - 1])) 1582 return; 1583 1584 // If this is the first chunk that failed, save it in previousText before look ahead 1585 if (m_buffer.isEmpty()) 1586 m_previousText = m_underlyingIterator.copyableText(); 1587 1588 // Look ahead to next chunk. If it is whitespace or a break, we can use the previous stuff 1589 m_underlyingIterator.advance(); 1590 if (m_underlyingIterator.atEnd() || !m_underlyingIterator.text().length() || isSpaceOrNewline(m_underlyingIterator.text()[0])) { 1591 m_didLookAhead = true; 1592 return; 1593 } 1594 1595 if (m_buffer.isEmpty()) { 1596 // Start gobbling chunks until we get to a suitable stopping point 1597 append(m_buffer, m_previousText.text()); 1598 m_previousText.reset(); 1599 } 1600 append(m_buffer, m_underlyingIterator.text()); 1601 } 1602} 1603 1604StringView WordAwareIterator::text() const 1605{ 1606 if (!m_buffer.isEmpty()) 1607 return StringView(m_buffer.data(), m_buffer.size()); 1608 if (m_previousText.text().length()) 1609 return m_previousText.text(); 1610 return m_underlyingIterator.text(); 1611} 1612 1613// -------- 1614 1615static inline UChar foldQuoteMark(UChar c) 1616{ 1617 switch (c) { 1618 case hebrewPunctuationGershayim: 1619 case leftDoubleQuotationMark: 1620 case rightDoubleQuotationMark: 1621 return '"'; 1622 case hebrewPunctuationGeresh: 1623 case leftSingleQuotationMark: 1624 case rightSingleQuotationMark: 1625 return '\''; 1626 default: 1627 return c; 1628 } 1629} 1630 1631// FIXME: We'd like to tailor the searcher to fold quote marks for us instead 1632// of doing it in a separate replacement pass here, but ICU doesn't offer a way 1633// to add tailoring on top of the locale-specific tailoring as of this writing. 1634static inline String foldQuoteMarks(String string) 1635{ 1636 string.replace(hebrewPunctuationGeresh, '\''); 1637 string.replace(hebrewPunctuationGershayim, '"'); 1638 string.replace(leftDoubleQuotationMark, '"'); 1639 string.replace(leftSingleQuotationMark, '\''); 1640 string.replace(rightDoubleQuotationMark, '"'); 1641 string.replace(rightSingleQuotationMark, '\''); 1642 1643 return string; 1644} 1645 1646#if !UCONFIG_NO_COLLATION 1647 1648const size_t minimumSearchBufferSize = 8192; 1649 1650#ifndef NDEBUG 1651static bool searcherInUse; 1652#endif 1653 1654static UStringSearch* createSearcher() 1655{ 1656 // Provide a non-empty pattern and non-empty text so usearch_open will not fail, 1657 // but it doesn't matter exactly what it is, since we don't perform any searches 1658 // without setting both the pattern and the text. 1659 UErrorCode status = U_ZERO_ERROR; 1660 String searchCollatorName = makeString(currentSearchLocaleID(), "@collation=search"); 1661 UStringSearch* searcher = usearch_open(&newlineCharacter, 1, &newlineCharacter, 1, searchCollatorName.utf8().data(), 0, &status); 1662 ASSERT(status == U_ZERO_ERROR || status == U_USING_FALLBACK_WARNING || status == U_USING_DEFAULT_WARNING); 1663 return searcher; 1664} 1665 1666static UStringSearch* searcher() 1667{ 1668 static UStringSearch* searcher = createSearcher(); 1669 return searcher; 1670} 1671 1672static inline void lockSearcher() 1673{ 1674#ifndef NDEBUG 1675 ASSERT(!searcherInUse); 1676 searcherInUse = true; 1677#endif 1678} 1679 1680static inline void unlockSearcher() 1681{ 1682#ifndef NDEBUG 1683 ASSERT(searcherInUse); 1684 searcherInUse = false; 1685#endif 1686} 1687 1688// ICU's search ignores the distinction between small kana letters and ones 1689// that are not small, and also characters that differ only in the voicing 1690// marks when considering only primary collation strength differences. 1691// This is not helpful for end users, since these differences make words 1692// distinct, so for our purposes we need these to be considered. 1693// The Unicode folks do not think the collation algorithm should be 1694// changed. To work around this, we would like to tailor the ICU searcher, 1695// but we can't get that to work yet. So instead, we check for cases where 1696// these differences occur, and skip those matches. 1697 1698// We refer to the above technique as the "kana workaround". The next few 1699// functions are helper functinos for the kana workaround. 1700 1701static inline bool isKanaLetter(UChar character) 1702{ 1703 // Hiragana letters. 1704 if (character >= 0x3041 && character <= 0x3096) 1705 return true; 1706 1707 // Katakana letters. 1708 if (character >= 0x30A1 && character <= 0x30FA) 1709 return true; 1710 if (character >= 0x31F0 && character <= 0x31FF) 1711 return true; 1712 1713 // Halfwidth katakana letters. 1714 if (character >= 0xFF66 && character <= 0xFF9D && character != 0xFF70) 1715 return true; 1716 1717 return false; 1718} 1719 1720static inline bool isSmallKanaLetter(UChar character) 1721{ 1722 ASSERT(isKanaLetter(character)); 1723 1724 switch (character) { 1725 case 0x3041: // HIRAGANA LETTER SMALL A 1726 case 0x3043: // HIRAGANA LETTER SMALL I 1727 case 0x3045: // HIRAGANA LETTER SMALL U 1728 case 0x3047: // HIRAGANA LETTER SMALL E 1729 case 0x3049: // HIRAGANA LETTER SMALL O 1730 case 0x3063: // HIRAGANA LETTER SMALL TU 1731 case 0x3083: // HIRAGANA LETTER SMALL YA 1732 case 0x3085: // HIRAGANA LETTER SMALL YU 1733 case 0x3087: // HIRAGANA LETTER SMALL YO 1734 case 0x308E: // HIRAGANA LETTER SMALL WA 1735 case 0x3095: // HIRAGANA LETTER SMALL KA 1736 case 0x3096: // HIRAGANA LETTER SMALL KE 1737 case 0x30A1: // KATAKANA LETTER SMALL A 1738 case 0x30A3: // KATAKANA LETTER SMALL I 1739 case 0x30A5: // KATAKANA LETTER SMALL U 1740 case 0x30A7: // KATAKANA LETTER SMALL E 1741 case 0x30A9: // KATAKANA LETTER SMALL O 1742 case 0x30C3: // KATAKANA LETTER SMALL TU 1743 case 0x30E3: // KATAKANA LETTER SMALL YA 1744 case 0x30E5: // KATAKANA LETTER SMALL YU 1745 case 0x30E7: // KATAKANA LETTER SMALL YO 1746 case 0x30EE: // KATAKANA LETTER SMALL WA 1747 case 0x30F5: // KATAKANA LETTER SMALL KA 1748 case 0x30F6: // KATAKANA LETTER SMALL KE 1749 case 0x31F0: // KATAKANA LETTER SMALL KU 1750 case 0x31F1: // KATAKANA LETTER SMALL SI 1751 case 0x31F2: // KATAKANA LETTER SMALL SU 1752 case 0x31F3: // KATAKANA LETTER SMALL TO 1753 case 0x31F4: // KATAKANA LETTER SMALL NU 1754 case 0x31F5: // KATAKANA LETTER SMALL HA 1755 case 0x31F6: // KATAKANA LETTER SMALL HI 1756 case 0x31F7: // KATAKANA LETTER SMALL HU 1757 case 0x31F8: // KATAKANA LETTER SMALL HE 1758 case 0x31F9: // KATAKANA LETTER SMALL HO 1759 case 0x31FA: // KATAKANA LETTER SMALL MU 1760 case 0x31FB: // KATAKANA LETTER SMALL RA 1761 case 0x31FC: // KATAKANA LETTER SMALL RI 1762 case 0x31FD: // KATAKANA LETTER SMALL RU 1763 case 0x31FE: // KATAKANA LETTER SMALL RE 1764 case 0x31FF: // KATAKANA LETTER SMALL RO 1765 case 0xFF67: // HALFWIDTH KATAKANA LETTER SMALL A 1766 case 0xFF68: // HALFWIDTH KATAKANA LETTER SMALL I 1767 case 0xFF69: // HALFWIDTH KATAKANA LETTER SMALL U 1768 case 0xFF6A: // HALFWIDTH KATAKANA LETTER SMALL E 1769 case 0xFF6B: // HALFWIDTH KATAKANA LETTER SMALL O 1770 case 0xFF6C: // HALFWIDTH KATAKANA LETTER SMALL YA 1771 case 0xFF6D: // HALFWIDTH KATAKANA LETTER SMALL YU 1772 case 0xFF6E: // HALFWIDTH KATAKANA LETTER SMALL YO 1773 case 0xFF6F: // HALFWIDTH KATAKANA LETTER SMALL TU 1774 return true; 1775 } 1776 return false; 1777} 1778 1779enum VoicedSoundMarkType { NoVoicedSoundMark, VoicedSoundMark, SemiVoicedSoundMark }; 1780 1781static inline VoicedSoundMarkType composedVoicedSoundMark(UChar character) 1782{ 1783 ASSERT(isKanaLetter(character)); 1784 1785 switch (character) { 1786 case 0x304C: // HIRAGANA LETTER GA 1787 case 0x304E: // HIRAGANA LETTER GI 1788 case 0x3050: // HIRAGANA LETTER GU 1789 case 0x3052: // HIRAGANA LETTER GE 1790 case 0x3054: // HIRAGANA LETTER GO 1791 case 0x3056: // HIRAGANA LETTER ZA 1792 case 0x3058: // HIRAGANA LETTER ZI 1793 case 0x305A: // HIRAGANA LETTER ZU 1794 case 0x305C: // HIRAGANA LETTER ZE 1795 case 0x305E: // HIRAGANA LETTER ZO 1796 case 0x3060: // HIRAGANA LETTER DA 1797 case 0x3062: // HIRAGANA LETTER DI 1798 case 0x3065: // HIRAGANA LETTER DU 1799 case 0x3067: // HIRAGANA LETTER DE 1800 case 0x3069: // HIRAGANA LETTER DO 1801 case 0x3070: // HIRAGANA LETTER BA 1802 case 0x3073: // HIRAGANA LETTER BI 1803 case 0x3076: // HIRAGANA LETTER BU 1804 case 0x3079: // HIRAGANA LETTER BE 1805 case 0x307C: // HIRAGANA LETTER BO 1806 case 0x3094: // HIRAGANA LETTER VU 1807 case 0x30AC: // KATAKANA LETTER GA 1808 case 0x30AE: // KATAKANA LETTER GI 1809 case 0x30B0: // KATAKANA LETTER GU 1810 case 0x30B2: // KATAKANA LETTER GE 1811 case 0x30B4: // KATAKANA LETTER GO 1812 case 0x30B6: // KATAKANA LETTER ZA 1813 case 0x30B8: // KATAKANA LETTER ZI 1814 case 0x30BA: // KATAKANA LETTER ZU 1815 case 0x30BC: // KATAKANA LETTER ZE 1816 case 0x30BE: // KATAKANA LETTER ZO 1817 case 0x30C0: // KATAKANA LETTER DA 1818 case 0x30C2: // KATAKANA LETTER DI 1819 case 0x30C5: // KATAKANA LETTER DU 1820 case 0x30C7: // KATAKANA LETTER DE 1821 case 0x30C9: // KATAKANA LETTER DO 1822 case 0x30D0: // KATAKANA LETTER BA 1823 case 0x30D3: // KATAKANA LETTER BI 1824 case 0x30D6: // KATAKANA LETTER BU 1825 case 0x30D9: // KATAKANA LETTER BE 1826 case 0x30DC: // KATAKANA LETTER BO 1827 case 0x30F4: // KATAKANA LETTER VU 1828 case 0x30F7: // KATAKANA LETTER VA 1829 case 0x30F8: // KATAKANA LETTER VI 1830 case 0x30F9: // KATAKANA LETTER VE 1831 case 0x30FA: // KATAKANA LETTER VO 1832 return VoicedSoundMark; 1833 case 0x3071: // HIRAGANA LETTER PA 1834 case 0x3074: // HIRAGANA LETTER PI 1835 case 0x3077: // HIRAGANA LETTER PU 1836 case 0x307A: // HIRAGANA LETTER PE 1837 case 0x307D: // HIRAGANA LETTER PO 1838 case 0x30D1: // KATAKANA LETTER PA 1839 case 0x30D4: // KATAKANA LETTER PI 1840 case 0x30D7: // KATAKANA LETTER PU 1841 case 0x30DA: // KATAKANA LETTER PE 1842 case 0x30DD: // KATAKANA LETTER PO 1843 return SemiVoicedSoundMark; 1844 } 1845 return NoVoicedSoundMark; 1846} 1847 1848static inline bool isCombiningVoicedSoundMark(UChar character) 1849{ 1850 switch (character) { 1851 case 0x3099: // COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK 1852 case 0x309A: // COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK 1853 return true; 1854 } 1855 return false; 1856} 1857 1858static inline bool containsKanaLetters(const String& pattern) 1859{ 1860 if (pattern.is8Bit()) 1861 return false; 1862 const UChar* characters = pattern.characters16(); 1863 unsigned length = pattern.length(); 1864 for (unsigned i = 0; i < length; ++i) { 1865 if (isKanaLetter(characters[i])) 1866 return true; 1867 } 1868 return false; 1869} 1870 1871static void normalizeCharacters(const UChar* characters, unsigned length, Vector<UChar>& buffer) 1872{ 1873 ASSERT(length); 1874 1875 buffer.resize(length); 1876 1877 UErrorCode status = U_ZERO_ERROR; 1878 size_t bufferSize = unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), length, &status); 1879 ASSERT(status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING || status == U_BUFFER_OVERFLOW_ERROR); 1880 ASSERT(bufferSize); 1881 1882 buffer.resize(bufferSize); 1883 1884 if (status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING) 1885 return; 1886 1887 status = U_ZERO_ERROR; 1888 unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), bufferSize, &status); 1889 ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); 1890} 1891 1892static bool isNonLatin1Separator(UChar32 character) 1893{ 1894 ASSERT_ARG(character, character >= 256); 1895 1896 return U_GET_GC_MASK(character) & (U_GC_S_MASK | U_GC_P_MASK | U_GC_Z_MASK | U_GC_CF_MASK); 1897} 1898 1899static inline bool isSeparator(UChar32 character) 1900{ 1901 static const bool latin1SeparatorTable[256] = { 1902 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1903 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1904 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // space ! " # $ % & ' ( ) * + , - . / 1905 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, // : ; < = > ? 1906 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // @ 1907 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, // [ \ ] ^ _ 1908 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // ` 1909 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, // { | } ~ 1910 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1911 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1912 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1913 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1914 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1915 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1916 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1917 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0 1918 }; 1919 1920 if (character < 256) 1921 return latin1SeparatorTable[character]; 1922 1923 return isNonLatin1Separator(character); 1924} 1925 1926inline SearchBuffer::SearchBuffer(const String& target, FindOptions options) 1927 : m_target(foldQuoteMarks(target)) 1928 , m_targetCharacters(StringView(m_target).upconvertedCharacters()) 1929 , m_options(options) 1930 , m_prefixLength(0) 1931 , m_atBreak(true) 1932 , m_needsMoreContext(options & AtWordStarts) 1933 , m_targetRequiresKanaWorkaround(containsKanaLetters(m_target)) 1934{ 1935 ASSERT(!m_target.isEmpty()); 1936 1937 size_t targetLength = m_target.length(); 1938 m_buffer.reserveInitialCapacity(std::max(targetLength * 8, minimumSearchBufferSize)); 1939 m_overlap = m_buffer.capacity() / 4; 1940 1941 if ((m_options & AtWordStarts) && targetLength) { 1942 UChar32 targetFirstCharacter; 1943 U16_GET(m_target, 0, 0, targetLength, targetFirstCharacter); 1944 // Characters in the separator category never really occur at the beginning of a word, 1945 // so if the target begins with such a character, we just ignore the AtWordStart option. 1946 if (isSeparator(targetFirstCharacter)) { 1947 m_options &= ~AtWordStarts; 1948 m_needsMoreContext = false; 1949 } 1950 } 1951 1952 // Grab the single global searcher. 1953 // If we ever have a reason to do more than once search buffer at once, we'll have 1954 // to move to multiple searchers. 1955 lockSearcher(); 1956 1957 UStringSearch* searcher = WebCore::searcher(); 1958 UCollator* collator = usearch_getCollator(searcher); 1959 1960 UCollationStrength strength; 1961 USearchAttributeValue comparator; 1962 if (m_options & CaseInsensitive) { 1963 // Without loss of generality, have 'e' match {'e', 'E', 'é', 'É'} and 'é' match {'é', 'É'}. 1964 strength = UCOL_SECONDARY; 1965 comparator = USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD; 1966 } else { 1967 // Without loss of generality, have 'e' match {'e'} and 'é' match {'é'}. 1968 strength = UCOL_TERTIARY; 1969 comparator = USEARCH_STANDARD_ELEMENT_COMPARISON; 1970 } 1971 if (ucol_getStrength(collator) != strength) { 1972 ucol_setStrength(collator, strength); 1973 usearch_reset(searcher); 1974 } 1975 1976 UErrorCode status = U_ZERO_ERROR; 1977 usearch_setAttribute(searcher, USEARCH_ELEMENT_COMPARISON, comparator, &status); 1978 ASSERT(status == U_ZERO_ERROR); 1979 1980 usearch_setPattern(searcher, m_targetCharacters, targetLength, &status); 1981 ASSERT(status == U_ZERO_ERROR); 1982 1983 // The kana workaround requires a normalized copy of the target string. 1984 if (m_targetRequiresKanaWorkaround) 1985 normalizeCharacters(m_targetCharacters, targetLength, m_normalizedTarget); 1986} 1987 1988inline SearchBuffer::~SearchBuffer() 1989{ 1990 // Leave the static object pointing to a valid string. 1991 UErrorCode status = U_ZERO_ERROR; 1992 usearch_setPattern(WebCore::searcher(), &newlineCharacter, 1, &status); 1993 ASSERT(status == U_ZERO_ERROR); 1994 usearch_setText(WebCore::searcher(), &newlineCharacter, 1, &status); 1995 ASSERT(status == U_ZERO_ERROR); 1996 1997 unlockSearcher(); 1998} 1999 2000inline size_t SearchBuffer::append(StringView text) 2001{ 2002 ASSERT(text.length()); 2003 2004 if (m_atBreak) { 2005 m_buffer.shrink(0); 2006 m_prefixLength = 0; 2007 m_atBreak = false; 2008 } else if (m_buffer.size() == m_buffer.capacity()) { 2009 memcpy(m_buffer.data(), m_buffer.data() + m_buffer.size() - m_overlap, m_overlap * sizeof(UChar)); 2010 m_prefixLength -= std::min(m_prefixLength, m_buffer.size() - m_overlap); 2011 m_buffer.shrink(m_overlap); 2012 } 2013 2014 size_t oldLength = m_buffer.size(); 2015 size_t usableLength = std::min<size_t>(m_buffer.capacity() - oldLength, text.length()); 2016 ASSERT(usableLength); 2017 m_buffer.grow(oldLength + usableLength); 2018 for (unsigned i = 0; i < usableLength; ++i) 2019 m_buffer[oldLength + i] = foldQuoteMark(text[i]); 2020 return usableLength; 2021} 2022 2023inline bool SearchBuffer::needsMoreContext() const 2024{ 2025 return m_needsMoreContext; 2026} 2027 2028inline void SearchBuffer::prependContext(StringView text) 2029{ 2030 ASSERT(m_needsMoreContext); 2031 ASSERT(m_prefixLength == m_buffer.size()); 2032 2033 if (!text.length()) 2034 return; 2035 2036 m_atBreak = false; 2037 2038 size_t wordBoundaryContextStart = text.length(); 2039 if (wordBoundaryContextStart) { 2040 U16_BACK_1(text, 0, wordBoundaryContextStart); 2041 wordBoundaryContextStart = startOfLastWordBoundaryContext(text.substring(0, wordBoundaryContextStart)); 2042 } 2043 2044 size_t usableLength = std::min(m_buffer.capacity() - m_prefixLength, text.length() - wordBoundaryContextStart); 2045 WTF::append(m_buffer, text.substring(text.length() - usableLength, usableLength)); 2046 m_prefixLength += usableLength; 2047 2048 if (wordBoundaryContextStart || m_prefixLength == m_buffer.capacity()) 2049 m_needsMoreContext = false; 2050} 2051 2052inline bool SearchBuffer::atBreak() const 2053{ 2054 return m_atBreak; 2055} 2056 2057inline void SearchBuffer::reachedBreak() 2058{ 2059 m_atBreak = true; 2060} 2061 2062inline bool SearchBuffer::isBadMatch(const UChar* match, size_t matchLength) const 2063{ 2064 // This function implements the kana workaround. If usearch treats 2065 // it as a match, but we do not want to, then it's a "bad match". 2066 if (!m_targetRequiresKanaWorkaround) 2067 return false; 2068 2069 // Normalize into a match buffer. We reuse a single buffer rather than 2070 // creating a new one each time. 2071 normalizeCharacters(match, matchLength, m_normalizedMatch); 2072 2073 const UChar* a = m_normalizedTarget.begin(); 2074 const UChar* aEnd = m_normalizedTarget.end(); 2075 2076 const UChar* b = m_normalizedMatch.begin(); 2077 const UChar* bEnd = m_normalizedMatch.end(); 2078 2079 while (true) { 2080 // Skip runs of non-kana-letter characters. This is necessary so we can 2081 // correctly handle strings where the target and match have different-length 2082 // runs of characters that match, while still double checking the correctness 2083 // of matches of kana letters with other kana letters. 2084 while (a != aEnd && !isKanaLetter(*a)) 2085 ++a; 2086 while (b != bEnd && !isKanaLetter(*b)) 2087 ++b; 2088 2089 // If we reached the end of either the target or the match, we should have 2090 // reached the end of both; both should have the same number of kana letters. 2091 if (a == aEnd || b == bEnd) { 2092 ASSERT(a == aEnd); 2093 ASSERT(b == bEnd); 2094 return false; 2095 } 2096 2097 // Check for differences in the kana letter character itself. 2098 if (isSmallKanaLetter(*a) != isSmallKanaLetter(*b)) 2099 return true; 2100 if (composedVoicedSoundMark(*a) != composedVoicedSoundMark(*b)) 2101 return true; 2102 ++a; 2103 ++b; 2104 2105 // Check for differences in combining voiced sound marks found after the letter. 2106 while (1) { 2107 if (!(a != aEnd && isCombiningVoicedSoundMark(*a))) { 2108 if (b != bEnd && isCombiningVoicedSoundMark(*b)) 2109 return true; 2110 break; 2111 } 2112 if (!(b != bEnd && isCombiningVoicedSoundMark(*b))) 2113 return true; 2114 if (*a != *b) 2115 return true; 2116 ++a; 2117 ++b; 2118 } 2119 } 2120} 2121 2122inline bool SearchBuffer::isWordEndMatch(size_t start, size_t length) const 2123{ 2124 ASSERT(length); 2125 ASSERT(m_options & AtWordEnds); 2126 2127 int endWord; 2128 // Start searching at the end of matched search, so that multiple word matches succeed. 2129 findEndWordBoundary(StringView(m_buffer.data(), m_buffer.size()), start + length - 1, &endWord); 2130 return static_cast<size_t>(endWord) == (start + length); 2131} 2132 2133inline bool SearchBuffer::isWordStartMatch(size_t start, size_t length) const 2134{ 2135 ASSERT(m_options & AtWordStarts); 2136 2137 if (!start) 2138 return true; 2139 2140 int size = m_buffer.size(); 2141 int offset = start; 2142 UChar32 firstCharacter; 2143 U16_GET(m_buffer.data(), 0, offset, size, firstCharacter); 2144 2145 if (m_options & TreatMedialCapitalAsWordStart) { 2146 UChar32 previousCharacter; 2147 U16_PREV(m_buffer.data(), 0, offset, previousCharacter); 2148 2149 if (isSeparator(firstCharacter)) { 2150 // The start of a separator run is a word start (".org" in "webkit.org"). 2151 if (!isSeparator(previousCharacter)) 2152 return true; 2153 } else if (isASCIIUpper(firstCharacter)) { 2154 // The start of an uppercase run is a word start ("Kit" in "WebKit"). 2155 if (!isASCIIUpper(previousCharacter)) 2156 return true; 2157 // The last character of an uppercase run followed by a non-separator, non-digit 2158 // is a word start ("Request" in "XMLHTTPRequest"). 2159 offset = start; 2160 U16_FWD_1(m_buffer.data(), offset, size); 2161 UChar32 nextCharacter = 0; 2162 if (offset < size) 2163 U16_GET(m_buffer.data(), 0, offset, size, nextCharacter); 2164 if (!isASCIIUpper(nextCharacter) && !isASCIIDigit(nextCharacter) && !isSeparator(nextCharacter)) 2165 return true; 2166 } else if (isASCIIDigit(firstCharacter)) { 2167 // The start of a digit run is a word start ("2" in "WebKit2"). 2168 if (!isASCIIDigit(previousCharacter)) 2169 return true; 2170 } else if (isSeparator(previousCharacter) || isASCIIDigit(previousCharacter)) { 2171 // The start of a non-separator, non-uppercase, non-digit run is a word start, 2172 // except after an uppercase. ("org" in "webkit.org", but not "ore" in "WebCore"). 2173 return true; 2174 } 2175 } 2176 2177 // Chinese and Japanese lack word boundary marks, and there is no clear agreement on what constitutes 2178 // a word, so treat the position before any CJK character as a word start. 2179 if (Font::isCJKIdeographOrSymbol(firstCharacter)) 2180 return true; 2181 2182 size_t wordBreakSearchStart = start + length; 2183 while (wordBreakSearchStart > start) 2184 wordBreakSearchStart = findNextWordFromIndex(StringView(m_buffer.data(), m_buffer.size()), wordBreakSearchStart, false /* backwards */); 2185 return wordBreakSearchStart == start; 2186} 2187 2188inline size_t SearchBuffer::search(size_t& start) 2189{ 2190 size_t size = m_buffer.size(); 2191 if (m_atBreak) { 2192 if (!size) 2193 return 0; 2194 } else { 2195 if (size != m_buffer.capacity()) 2196 return 0; 2197 } 2198 2199 UStringSearch* searcher = WebCore::searcher(); 2200 2201 UErrorCode status = U_ZERO_ERROR; 2202 usearch_setText(searcher, m_buffer.data(), size, &status); 2203 ASSERT(status == U_ZERO_ERROR); 2204 2205 usearch_setOffset(searcher, m_prefixLength, &status); 2206 ASSERT(status == U_ZERO_ERROR); 2207 2208 int matchStart = usearch_next(searcher, &status); 2209 ASSERT(status == U_ZERO_ERROR); 2210 2211nextMatch: 2212 if (!(matchStart >= 0 && static_cast<size_t>(matchStart) < size)) { 2213 ASSERT(matchStart == USEARCH_DONE); 2214 return 0; 2215 } 2216 2217 // Matches that start in the overlap area are only tentative. 2218 // The same match may appear later, matching more characters, 2219 // possibly including a combining character that's not yet in the buffer. 2220 if (!m_atBreak && static_cast<size_t>(matchStart) >= size - m_overlap) { 2221 size_t overlap = m_overlap; 2222 if (m_options & AtWordStarts) { 2223 // Ensure that there is sufficient context before matchStart the next time around for 2224 // determining if it is at a word boundary. 2225 unsigned wordBoundaryContextStart = matchStart; 2226 U16_BACK_1(m_buffer.data(), 0, wordBoundaryContextStart); 2227 wordBoundaryContextStart = startOfLastWordBoundaryContext(StringView(m_buffer.data(), wordBoundaryContextStart)); 2228 overlap = std::min(size - 1, std::max(overlap, size - wordBoundaryContextStart)); 2229 } 2230 memcpy(m_buffer.data(), m_buffer.data() + size - overlap, overlap * sizeof(UChar)); 2231 m_prefixLength -= std::min(m_prefixLength, size - overlap); 2232 m_buffer.shrink(overlap); 2233 return 0; 2234 } 2235 2236 size_t matchedLength = usearch_getMatchedLength(searcher); 2237 ASSERT_WITH_SECURITY_IMPLICATION(matchStart + matchedLength <= size); 2238 2239 // If this match is "bad", move on to the next match. 2240 if (isBadMatch(m_buffer.data() + matchStart, matchedLength) 2241 || ((m_options & AtWordStarts) && !isWordStartMatch(matchStart, matchedLength)) 2242 || ((m_options & AtWordEnds) && !isWordEndMatch(matchStart, matchedLength))) { 2243 matchStart = usearch_next(searcher, &status); 2244 ASSERT(status == U_ZERO_ERROR); 2245 goto nextMatch; 2246 } 2247 2248 size_t newSize = size - (matchStart + 1); 2249 memmove(m_buffer.data(), m_buffer.data() + matchStart + 1, newSize * sizeof(UChar)); 2250 m_prefixLength -= std::min<size_t>(m_prefixLength, matchStart + 1); 2251 m_buffer.shrink(newSize); 2252 2253 start = size - matchStart; 2254 return matchedLength; 2255} 2256 2257#else 2258 2259inline SearchBuffer::SearchBuffer(const String& target, FindOptions options) 2260 : m_target(options & CaseInsensitive ? target.foldCase() : target) 2261 , m_options(options) 2262 , m_buffer(m_target.length()) 2263 , m_isCharacterStartBuffer(m_target.length()) 2264 , m_isBufferFull(false) 2265 , m_cursor(0) 2266{ 2267 ASSERT(!m_target.isEmpty()); 2268 m_target.replace(noBreakSpace, ' '); 2269 foldQuoteMarks(m_target); 2270} 2271 2272inline SearchBuffer::~SearchBuffer() 2273{ 2274} 2275 2276inline void SearchBuffer::reachedBreak() 2277{ 2278 m_cursor = 0; 2279 m_isBufferFull = false; 2280} 2281 2282inline bool SearchBuffer::atBreak() const 2283{ 2284 return !m_cursor && !m_isBufferFull; 2285} 2286 2287inline void SearchBuffer::append(UChar c, bool isStart) 2288{ 2289 m_buffer[m_cursor] = c == noBreakSpace ? ' ' : foldQuoteMark(c); 2290 m_isCharacterStartBuffer[m_cursor] = isStart; 2291 if (++m_cursor == m_target.length()) { 2292 m_cursor = 0; 2293 m_isBufferFull = true; 2294 } 2295} 2296 2297inline size_t SearchBuffer::append(const UChar* characters, size_t length) 2298{ 2299 ASSERT(length); 2300 if (!(m_options & CaseInsensitive)) { 2301 append(characters[0], true); 2302 return 1; 2303 } 2304 const int maxFoldedCharacters = 16; // sensible maximum is 3, this should be more than enough 2305 UChar foldedCharacters[maxFoldedCharacters]; 2306 UErrorCode status = U_ZERO_ERROR; 2307 int numFoldedCharacters = u_strFoldCase(foldedCharacters, maxFoldedCharacters, characters, 1, U_FOLD_CASE_DEFAULT, &status); 2308 ASSERT(U_SUCCESS(status)); 2309 ASSERT(numFoldedCharacters); 2310 ASSERT(numFoldedCharacters <= maxFoldedCharacters); 2311 if (U_SUCCESS(status) && numFoldedCharacters) { 2312 numFoldedCharacters = std::min(numFoldedCharacters, maxFoldedCharacters); 2313 append(foldedCharacters[0], true); 2314 for (int i = 1; i < numFoldedCharacters; ++i) 2315 append(foldedCharacters[i], false); 2316 } 2317 return 1; 2318} 2319 2320inline bool SearchBuffer::needsMoreContext() const 2321{ 2322 return false; 2323} 2324 2325void SearchBuffer::prependContext(const UChar*, size_t) 2326{ 2327 ASSERT_NOT_REACHED(); 2328} 2329 2330inline size_t SearchBuffer::search(size_t& start) 2331{ 2332 if (!m_isBufferFull) 2333 return 0; 2334 if (!m_isCharacterStartBuffer[m_cursor]) 2335 return 0; 2336 2337 size_t tailSpace = m_target.length() - m_cursor; 2338 if (memcmp(&m_buffer[m_cursor], m_target.characters(), tailSpace * sizeof(UChar)) != 0) 2339 return 0; 2340 if (memcmp(&m_buffer[0], m_target.characters() + tailSpace, m_cursor * sizeof(UChar)) != 0) 2341 return 0; 2342 2343 start = length(); 2344 2345 // Now that we've found a match once, we don't want to find it again, because those 2346 // are the SearchBuffer semantics, allowing for a buffer where you append more than one 2347 // character at a time. To do this we take advantage of m_isCharacterStartBuffer, but if 2348 // we want to get rid of that in the future we could track this with a separate boolean 2349 // or even move the characters to the start of the buffer and set m_isBufferFull to false. 2350 m_isCharacterStartBuffer[m_cursor] = false; 2351 2352 return start; 2353} 2354 2355// Returns the number of characters that were appended to the buffer (what we are searching in). 2356// That's not necessarily the same length as the passed-in target string, because case folding 2357// can make two strings match even though they're not the same length. 2358size_t SearchBuffer::length() const 2359{ 2360 size_t bufferSize = m_target.length(); 2361 size_t length = 0; 2362 for (size_t i = 0; i < bufferSize; ++i) 2363 length += m_isCharacterStartBuffer[i]; 2364 return length; 2365} 2366 2367#endif 2368 2369// -------- 2370 2371int TextIterator::rangeLength(const Range* range, bool forSelectionPreservation) 2372{ 2373 unsigned length = 0; 2374 for (TextIterator it(range, forSelectionPreservation ? TextIteratorEmitsCharactersBetweenAllVisiblePositions : TextIteratorDefaultBehavior); !it.atEnd(); it.advance()) 2375 length += it.text().length(); 2376 return length; 2377} 2378 2379PassRefPtr<Range> TextIterator::subrange(Range* entireRange, int characterOffset, int characterCount) 2380{ 2381 CharacterIterator entireRangeIterator(*entireRange); 2382 return characterSubrange(entireRangeIterator, characterOffset, characterCount); 2383} 2384 2385static inline bool isInsideReplacedElement(TextIterator& iterator) 2386{ 2387 ASSERT(!iterator.atEnd()); 2388 ASSERT(iterator.text().length() == 1); 2389 Node* node = iterator.node(); 2390 return node && isRendererReplacedElement(node->renderer()); 2391} 2392 2393PassRefPtr<Range> TextIterator::rangeFromLocationAndLength(ContainerNode* scope, int rangeLocation, int rangeLength, bool forSelectionPreservation) 2394{ 2395 RefPtr<Range> resultRange = scope->document().createRange(); 2396 2397 int docTextPosition = 0; 2398 int rangeEnd = rangeLocation + rangeLength; 2399 bool startRangeFound = false; 2400 2401 RefPtr<Range> textRunRange = rangeOfContents(*scope); 2402 2403 TextIterator it(textRunRange.get(), forSelectionPreservation ? TextIteratorEmitsCharactersBetweenAllVisiblePositions : TextIteratorDefaultBehavior); 2404 2405 // FIXME: the atEnd() check shouldn't be necessary, workaround for <http://bugs.webkit.org/show_bug.cgi?id=6289>. 2406 if (!rangeLocation && !rangeLength && it.atEnd()) { 2407 resultRange->setStart(textRunRange->startContainer(), 0); 2408 resultRange->setEnd(textRunRange->startContainer(), 0); 2409 return resultRange.release(); 2410 } 2411 2412 for (; !it.atEnd(); it.advance()) { 2413 int length = it.text().length(); 2414 textRunRange = it.range(); 2415 2416 bool foundStart = rangeLocation >= docTextPosition && rangeLocation <= docTextPosition + length; 2417 bool foundEnd = rangeEnd >= docTextPosition && rangeEnd <= docTextPosition + length; 2418 2419 if (foundEnd) { 2420 // FIXME: This is a workaround for the fact that the end of a run is often at the wrong 2421 // position for emitted '\n's or if the renderer of the current node is a replaced element. 2422 if (length == 1 && (it.text()[0] == '\n' || isInsideReplacedElement(it))) { 2423 it.advance(); 2424 if (!it.atEnd()) { 2425 RefPtr<Range> range = it.range(); 2426 textRunRange->setEnd(range->startContainer(), range->startOffset()); 2427 } else { 2428 Position runStart = textRunRange->startPosition(); 2429 Position runEnd = VisiblePosition(runStart).next().deepEquivalent(); 2430 if (runEnd.isNotNull()) 2431 textRunRange->setEnd(runEnd.containerNode(), runEnd.computeOffsetInContainerNode()); 2432 } 2433 } 2434 } 2435 2436 if (foundStart) { 2437 startRangeFound = true; 2438 if (textRunRange->startContainer()->isTextNode()) { 2439 int offset = rangeLocation - docTextPosition; 2440 resultRange->setStart(textRunRange->startContainer(), offset + textRunRange->startOffset()); 2441 } else { 2442 if (rangeLocation == docTextPosition) 2443 resultRange->setStart(textRunRange->startContainer(), textRunRange->startOffset()); 2444 else 2445 resultRange->setStart(textRunRange->endContainer(), textRunRange->endOffset()); 2446 } 2447 } 2448 2449 if (foundEnd) { 2450 if (textRunRange->startContainer()->isTextNode()) { 2451 int offset = rangeEnd - docTextPosition; 2452 resultRange->setEnd(textRunRange->startContainer(), offset + textRunRange->startOffset()); 2453 } else { 2454 if (rangeEnd == docTextPosition) 2455 resultRange->setEnd(textRunRange->startContainer(), textRunRange->startOffset()); 2456 else 2457 resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset()); 2458 } 2459 docTextPosition += length; 2460 break; 2461 } 2462 2463 docTextPosition += length; 2464 } 2465 2466 if (!startRangeFound) 2467 return nullptr; 2468 2469 if (rangeLength && rangeEnd > docTextPosition) // rangeEnd is out of bounds 2470 resultRange->setEnd(textRunRange->endContainer(), textRunRange->endOffset()); 2471 2472 return resultRange.release(); 2473} 2474 2475bool TextIterator::getLocationAndLengthFromRange(Node* scope, const Range* range, size_t& location, size_t& length) 2476{ 2477 location = notFound; 2478 length = 0; 2479 2480 if (!range->startContainer()) 2481 return false; 2482 2483 // The critical assumption is that this only gets called with ranges that 2484 // concentrate on a given area containing the selection root. This is done 2485 // because of text fields and textareas. The DOM for those is not 2486 // directly in the document DOM, so ensure that the range does not cross a 2487 // boundary of one of those. 2488 if (range->startContainer() != scope && !range->startContainer()->isDescendantOf(scope)) 2489 return false; 2490 if (range->endContainer() != scope && !range->endContainer()->isDescendantOf(scope)) 2491 return false; 2492 2493 RefPtr<Range> testRange = Range::create(scope->document(), scope, 0, range->startContainer(), range->startOffset()); 2494 ASSERT(testRange->startContainer() == scope); 2495 location = TextIterator::rangeLength(testRange.get()); 2496 2497 testRange->setEnd(range->endContainer(), range->endOffset(), IGNORE_EXCEPTION); 2498 ASSERT(testRange->startContainer() == scope); 2499 length = TextIterator::rangeLength(testRange.get()) - location; 2500 return true; 2501} 2502 2503// -------- 2504 2505String plainText(const Range* r, TextIteratorBehavior defaultBehavior, bool isDisplayString) 2506{ 2507 // The initial buffer size can be critical for performance: https://bugs.webkit.org/show_bug.cgi?id=81192 2508 static const unsigned initialCapacity = 1 << 15; 2509 2510 unsigned bufferLength = 0; 2511 StringBuilder builder; 2512 builder.reserveCapacity(initialCapacity); 2513 TextIteratorBehavior behavior = defaultBehavior; 2514 if (!isDisplayString) 2515 behavior = static_cast<TextIteratorBehavior>(behavior | TextIteratorEmitsTextsWithoutTranscoding); 2516 2517 for (TextIterator it(r, behavior); !it.atEnd(); it.advance()) { 2518 it.appendTextToStringBuilder(builder); 2519 bufferLength += it.text().length(); 2520 } 2521 2522 if (!bufferLength) 2523 return emptyString(); 2524 2525 String result = builder.toString(); 2526 2527 if (isDisplayString) 2528 r->ownerDocument().displayStringModifiedByEncoding(result); 2529 2530 return result; 2531} 2532 2533String plainTextReplacingNoBreakSpace(const Range* range, TextIteratorBehavior defaultBehavior, bool isDisplayString) 2534{ 2535 return plainText(range, defaultBehavior, isDisplayString).replace(noBreakSpace, ' '); 2536} 2537 2538static PassRefPtr<Range> collapsedToBoundary(const Range& range, bool forward) 2539{ 2540 RefPtr<Range> result = range.cloneRange(ASSERT_NO_EXCEPTION); 2541 result->collapse(!forward, ASSERT_NO_EXCEPTION); 2542 return result.release(); 2543} 2544 2545static size_t findPlainText(const Range& range, const String& target, FindOptions options, size_t& matchStart) 2546{ 2547 matchStart = 0; 2548 size_t matchLength = 0; 2549 2550 SearchBuffer buffer(target, options); 2551 2552 if (buffer.needsMoreContext()) { 2553 RefPtr<Range> beforeStartRange = range.ownerDocument().createRange(); 2554 beforeStartRange->setEnd(range.startContainer(), range.startOffset()); 2555 for (SimplifiedBackwardsTextIterator backwardsIterator(*beforeStartRange); !backwardsIterator.atEnd(); backwardsIterator.advance()) { 2556 buffer.prependContext(backwardsIterator.text()); 2557 if (!buffer.needsMoreContext()) 2558 break; 2559 } 2560 } 2561 2562 CharacterIterator findIterator(range, TextIteratorEntersTextControls); 2563 2564 while (!findIterator.atEnd()) { 2565 findIterator.advance(buffer.append(findIterator.text())); 2566tryAgain: 2567 size_t matchStartOffset; 2568 if (size_t newMatchLength = buffer.search(matchStartOffset)) { 2569 // Note that we found a match, and where we found it. 2570 size_t lastCharacterInBufferOffset = findIterator.characterOffset(); 2571 ASSERT(lastCharacterInBufferOffset >= matchStartOffset); 2572 matchStart = lastCharacterInBufferOffset - matchStartOffset; 2573 matchLength = newMatchLength; 2574 // If searching forward, stop on the first match. 2575 // If searching backward, don't stop, so we end up with the last match. 2576 if (!(options & Backwards)) 2577 break; 2578 goto tryAgain; 2579 } 2580 if (findIterator.atBreak() && !buffer.atBreak()) { 2581 buffer.reachedBreak(); 2582 goto tryAgain; 2583 } 2584 } 2585 2586 return matchLength; 2587} 2588 2589PassRefPtr<Range> findPlainText(const Range& range, const String& target, FindOptions options) 2590{ 2591 // First, find the text. 2592 size_t matchStart; 2593 size_t matchLength; 2594 { 2595 matchLength = findPlainText(range, target, options, matchStart); 2596 if (!matchLength) 2597 return collapsedToBoundary(range, !(options & Backwards)); 2598 } 2599 2600 // Then, find the document position of the start and the end of the text. 2601 CharacterIterator computeRangeIterator(range, TextIteratorEntersTextControls); 2602 return characterSubrange(computeRangeIterator, matchStart, matchLength); 2603} 2604 2605} 2606