1/* 2 * Copyright (C) 2006 Lars Knoll <lars@trolltech.com> 3 * Copyright (C) 2007, 2011, 2012 Apple Inc. All rights reserved. 4 * 5 * This library is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either 8 * version 2 of the License, or (at your option) any later version. 9 * 10 * This library is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public License 16 * along with this library; see the file COPYING.LIB. If not, write to 17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 * Boston, MA 02110-1301, USA. 19 * 20 */ 21 22#ifndef TextBreakIterator_h 23#define TextBreakIterator_h 24 25#include <wtf/text/AtomicString.h> 26#include <wtf/text/StringView.h> 27 28namespace WebCore { 29 30class TextBreakIterator; 31 32// Note: The returned iterator is good only until you get another iterator, with the exception of acquireLineBreakIterator. 33 34// This is similar to character break iterator in most cases, but is subject to 35// platform UI conventions. One notable example where this can be different 36// from character break iterator is Thai prepend characters, see bug 24342. 37// Use this for insertion point and selection manipulations. 38TextBreakIterator* cursorMovementIterator(StringView); 39 40TextBreakIterator* wordBreakIterator(StringView); 41TextBreakIterator* sentenceBreakIterator(StringView); 42 43TextBreakIterator* acquireLineBreakIterator(StringView, const AtomicString& locale, const UChar* priorContext, unsigned priorContextLength); 44void releaseLineBreakIterator(TextBreakIterator*); 45 46int textBreakFirst(TextBreakIterator*); 47int textBreakLast(TextBreakIterator*); 48int textBreakNext(TextBreakIterator*); 49int textBreakPrevious(TextBreakIterator*); 50int textBreakCurrent(TextBreakIterator*); 51int textBreakPreceding(TextBreakIterator*, int); 52int textBreakFollowing(TextBreakIterator*, int); 53bool isTextBreak(TextBreakIterator*, int); 54bool isWordTextBreak(TextBreakIterator*); 55 56const int TextBreakDone = -1; 57 58class LazyLineBreakIterator { 59public: 60 LazyLineBreakIterator() 61 : m_iterator(0) 62 , m_cachedPriorContext(0) 63 , m_cachedPriorContextLength(0) 64 { 65 resetPriorContext(); 66 } 67 68 LazyLineBreakIterator(String string, const AtomicString& locale = AtomicString()) 69 : m_string(string) 70 , m_locale(locale) 71 , m_iterator(0) 72 , m_cachedPriorContext(0) 73 , m_cachedPriorContextLength(0) 74 { 75 resetPriorContext(); 76 } 77 78 ~LazyLineBreakIterator() 79 { 80 if (m_iterator) 81 releaseLineBreakIterator(m_iterator); 82 } 83 84 String string() const { return m_string; } 85 86 UChar lastCharacter() const 87 { 88 COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length); 89 return m_priorContext[1]; 90 } 91 UChar secondToLastCharacter() const 92 { 93 COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length); 94 return m_priorContext[0]; 95 } 96 void setPriorContext(UChar last, UChar secondToLast) 97 { 98 COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length); 99 m_priorContext[0] = secondToLast; 100 m_priorContext[1] = last; 101 } 102 void updatePriorContext(UChar last) 103 { 104 COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length); 105 m_priorContext[0] = m_priorContext[1]; 106 m_priorContext[1] = last; 107 } 108 void resetPriorContext() 109 { 110 COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length); 111 m_priorContext[0] = 0; 112 m_priorContext[1] = 0; 113 } 114 unsigned priorContextLength() const 115 { 116 unsigned priorContextLength = 0; 117 COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length); 118 if (m_priorContext[1]) { 119 ++priorContextLength; 120 if (m_priorContext[0]) 121 ++priorContextLength; 122 } 123 return priorContextLength; 124 } 125 // Obtain text break iterator, possibly previously cached, where this iterator is (or has been) 126 // initialized to use the previously stored string as the primary breaking context and using 127 // previously stored prior context if non-empty. 128 TextBreakIterator* get(unsigned priorContextLength) 129 { 130 ASSERT(priorContextLength <= priorContextCapacity); 131 const UChar* priorContext = priorContextLength ? &m_priorContext[priorContextCapacity - priorContextLength] : 0; 132 if (!m_iterator) { 133 m_iterator = acquireLineBreakIterator(m_string, m_locale, priorContext, priorContextLength); 134 m_cachedPriorContext = priorContext; 135 m_cachedPriorContextLength = priorContextLength; 136 } else if (priorContext != m_cachedPriorContext || priorContextLength != m_cachedPriorContextLength) { 137 this->resetStringAndReleaseIterator(m_string, m_locale); 138 return this->get(priorContextLength); 139 } 140 return m_iterator; 141 } 142 void resetStringAndReleaseIterator(String string, const AtomicString& locale) 143 { 144 if (m_iterator) 145 releaseLineBreakIterator(m_iterator); 146 m_string = string; 147 m_locale = locale; 148 m_iterator = 0; 149 m_cachedPriorContext = 0; 150 m_cachedPriorContextLength = 0; 151 } 152 153private: 154 static const unsigned priorContextCapacity = 2; 155 String m_string; 156 AtomicString m_locale; 157 TextBreakIterator* m_iterator; 158 UChar m_priorContext[priorContextCapacity]; 159 const UChar* m_cachedPriorContext; 160 unsigned m_cachedPriorContextLength; 161}; 162 163// Iterates over "extended grapheme clusters", as defined in UAX #29. 164// Note that platform implementations may be less sophisticated - e.g. ICU prior to 165// version 4.0 only supports "legacy grapheme clusters". 166// Use this for general text processing, e.g. string truncation. 167 168class NonSharedCharacterBreakIterator { 169 WTF_MAKE_NONCOPYABLE(NonSharedCharacterBreakIterator); 170public: 171 NonSharedCharacterBreakIterator(StringView); 172 ~NonSharedCharacterBreakIterator(); 173 174 operator TextBreakIterator*() const { return m_iterator; } 175 176private: 177 TextBreakIterator* m_iterator; 178}; 179 180// Counts the number of grapheme clusters. A surrogate pair or a sequence 181// of a non-combining character and following combining characters is 182// counted as 1 grapheme cluster. 183unsigned numGraphemeClusters(const String&); 184// Returns the number of characters which will be less than or equal to 185// the specified grapheme cluster length. 186unsigned numCharactersInGraphemeClusters(const String&, unsigned); 187 188} 189 190#endif 191