1/*
2 * Copyright (C) 2006 Lars Knoll <lars@trolltech.com>
3 * Copyright (C) 2007, 2011, 2012 Apple Inc. All rights reserved.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB.  If not, write to
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 *
20 */
21
22#ifndef TextBreakIterator_h
23#define TextBreakIterator_h
24
25#include <wtf/text/AtomicString.h>
26#include <wtf/text/StringView.h>
27
28namespace WebCore {
29
30class TextBreakIterator;
31
32// Note: The returned iterator is good only until you get another iterator, with the exception of acquireLineBreakIterator.
33
34// This is similar to character break iterator in most cases, but is subject to
35// platform UI conventions. One notable example where this can be different
36// from character break iterator is Thai prepend characters, see bug 24342.
37// Use this for insertion point and selection manipulations.
38TextBreakIterator* cursorMovementIterator(StringView);
39
40TextBreakIterator* wordBreakIterator(StringView);
41TextBreakIterator* sentenceBreakIterator(StringView);
42
43TextBreakIterator* acquireLineBreakIterator(StringView, const AtomicString& locale, const UChar* priorContext, unsigned priorContextLength);
44void releaseLineBreakIterator(TextBreakIterator*);
45
46int textBreakFirst(TextBreakIterator*);
47int textBreakLast(TextBreakIterator*);
48int textBreakNext(TextBreakIterator*);
49int textBreakPrevious(TextBreakIterator*);
50int textBreakCurrent(TextBreakIterator*);
51int textBreakPreceding(TextBreakIterator*, int);
52int textBreakFollowing(TextBreakIterator*, int);
53bool isTextBreak(TextBreakIterator*, int);
54bool isWordTextBreak(TextBreakIterator*);
55
56const int TextBreakDone = -1;
57
58class LazyLineBreakIterator {
59public:
60    LazyLineBreakIterator()
61        : m_iterator(0)
62        , m_cachedPriorContext(0)
63        , m_cachedPriorContextLength(0)
64    {
65        resetPriorContext();
66    }
67
68    LazyLineBreakIterator(String string, const AtomicString& locale = AtomicString())
69        : m_string(string)
70        , m_locale(locale)
71        , m_iterator(0)
72        , m_cachedPriorContext(0)
73        , m_cachedPriorContextLength(0)
74    {
75        resetPriorContext();
76    }
77
78    ~LazyLineBreakIterator()
79    {
80        if (m_iterator)
81            releaseLineBreakIterator(m_iterator);
82    }
83
84    String string() const { return m_string; }
85
86    UChar lastCharacter() const
87    {
88        COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length);
89        return m_priorContext[1];
90    }
91    UChar secondToLastCharacter() const
92    {
93        COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length);
94        return m_priorContext[0];
95    }
96    void setPriorContext(UChar last, UChar secondToLast)
97    {
98        COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length);
99        m_priorContext[0] = secondToLast;
100        m_priorContext[1] = last;
101    }
102    void updatePriorContext(UChar last)
103    {
104        COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length);
105        m_priorContext[0] = m_priorContext[1];
106        m_priorContext[1] = last;
107    }
108    void resetPriorContext()
109    {
110        COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length);
111        m_priorContext[0] = 0;
112        m_priorContext[1] = 0;
113    }
114    unsigned priorContextLength() const
115    {
116        unsigned priorContextLength = 0;
117        COMPILE_ASSERT(WTF_ARRAY_LENGTH(m_priorContext) == 2, TextBreakIterator_unexpected_prior_context_length);
118        if (m_priorContext[1]) {
119            ++priorContextLength;
120            if (m_priorContext[0])
121                ++priorContextLength;
122        }
123        return priorContextLength;
124    }
125    // Obtain text break iterator, possibly previously cached, where this iterator is (or has been)
126    // initialized to use the previously stored string as the primary breaking context and using
127    // previously stored prior context if non-empty.
128    TextBreakIterator* get(unsigned priorContextLength)
129    {
130        ASSERT(priorContextLength <= priorContextCapacity);
131        const UChar* priorContext = priorContextLength ? &m_priorContext[priorContextCapacity - priorContextLength] : 0;
132        if (!m_iterator) {
133            m_iterator = acquireLineBreakIterator(m_string, m_locale, priorContext, priorContextLength);
134            m_cachedPriorContext = priorContext;
135            m_cachedPriorContextLength = priorContextLength;
136        } else if (priorContext != m_cachedPriorContext || priorContextLength != m_cachedPriorContextLength) {
137            this->resetStringAndReleaseIterator(m_string, m_locale);
138            return this->get(priorContextLength);
139        }
140        return m_iterator;
141    }
142    void resetStringAndReleaseIterator(String string, const AtomicString& locale)
143    {
144        if (m_iterator)
145            releaseLineBreakIterator(m_iterator);
146        m_string = string;
147        m_locale = locale;
148        m_iterator = 0;
149        m_cachedPriorContext = 0;
150        m_cachedPriorContextLength = 0;
151    }
152
153private:
154    static const unsigned priorContextCapacity = 2;
155    String m_string;
156    AtomicString m_locale;
157    TextBreakIterator* m_iterator;
158    UChar m_priorContext[priorContextCapacity];
159    const UChar* m_cachedPriorContext;
160    unsigned m_cachedPriorContextLength;
161};
162
163// Iterates over "extended grapheme clusters", as defined in UAX #29.
164// Note that platform implementations may be less sophisticated - e.g. ICU prior to
165// version 4.0 only supports "legacy grapheme clusters".
166// Use this for general text processing, e.g. string truncation.
167
168class NonSharedCharacterBreakIterator {
169    WTF_MAKE_NONCOPYABLE(NonSharedCharacterBreakIterator);
170public:
171    NonSharedCharacterBreakIterator(StringView);
172    ~NonSharedCharacterBreakIterator();
173
174    operator TextBreakIterator*() const { return m_iterator; }
175
176private:
177    TextBreakIterator* m_iterator;
178};
179
180// Counts the number of grapheme clusters. A surrogate pair or a sequence
181// of a non-combining character and following combining characters is
182// counted as 1 grapheme cluster.
183unsigned numGraphemeClusters(const String&);
184// Returns the number of characters which will be less than or equal to
185// the specified grapheme cluster length.
186unsigned numCharactersInGraphemeClusters(const String&, unsigned);
187
188}
189
190#endif
191