1/*
2 * Copyright (C) Research In Motion Limited 2011. All rights reserved.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Library General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 * Library General Public License for more details.
13 *
14 * You should have received a copy of the GNU Library General Public License
15 * along with this library; see the file COPYING.LIB.  If not, write to
16 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17 * Boston, MA 02110-1301, USA.
18 *
19 */
20
21#ifndef SurrogatePairAwareTextIterator_h
22#define SurrogatePairAwareTextIterator_h
23
24#include <wtf/unicode/CharacterNames.h>
25#include <wtf/text/WTFString.h>
26
27namespace WebCore {
28
29class SurrogatePairAwareTextIterator {
30public:
31    // The passed in UChar pointer starts at 'currentCharacter'. The iterator operatoes on the range [currentCharacter, lastCharacter].
32    // 'endCharacter' denotes the maximum length of the UChar array, which might exceed 'lastCharacter'.
33    SurrogatePairAwareTextIterator(const UChar*, int currentCharacter, int lastCharacter, int endCharacter);
34
35    inline bool consume(UChar32& character, unsigned& clusterLength)
36    {
37        if (m_currentCharacter >= m_lastCharacter)
38            return false;
39
40        character = *m_characters;
41        clusterLength = 1;
42
43        if (character < HiraganaLetterSmallA)
44            return true;
45
46        return consumeSlowCase(character, clusterLength);
47    }
48
49    void advance(unsigned advanceLength)
50    {
51        m_characters += advanceLength;
52        m_currentCharacter += advanceLength;
53    }
54
55    int currentCharacter() const { return m_currentCharacter; }
56    const UChar* characters() const { return m_characters; }
57
58private:
59    bool consumeSlowCase(UChar32&, unsigned&);
60    UChar32 normalizeVoicingMarks();
61
62    const UChar* m_characters;
63    int m_currentCharacter;
64    int m_lastCharacter;
65    int m_endCharacter;
66};
67
68}
69
70#endif
71