1/*
2    Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
3
4    This library is free software; you can redistribute it and/or
5    modify it under the terms of the GNU Library General Public
6    License as published by the Free Software Foundation; either
7    version 2 of the License, or (at your option) any later version.
8
9    This library is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12    Library General Public License for more details.
13
14    You should have received a copy of the GNU Library General Public License
15    along with this library; see the file COPYING.LIB.  If not, write to
16    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17    Boston, MA 02110-1301, USA.
18*/
19
20#ifndef SegmentedString_h
21#define SegmentedString_h
22
23#include <wtf/Deque.h>
24#include <wtf/text/StringBuilder.h>
25#include <wtf/text/TextPosition.h>
26#include <wtf/text/WTFString.h>
27
28namespace WebCore {
29
30class SegmentedString;
31
32class SegmentedSubstring {
33public:
34    SegmentedSubstring()
35        : m_length(0)
36        , m_doNotExcludeLineNumbers(true)
37        , m_is8Bit(false)
38    {
39        m_data.string16Ptr = 0;
40    }
41
42    SegmentedSubstring(const String& str)
43        : m_length(str.length())
44        , m_doNotExcludeLineNumbers(true)
45        , m_string(str)
46    {
47        if (m_length) {
48            if (m_string.is8Bit()) {
49                m_is8Bit = true;
50                m_data.string8Ptr = m_string.characters8();
51            } else {
52                m_is8Bit = false;
53                m_data.string16Ptr = m_string.characters16();
54            }
55        } else
56            m_is8Bit = false;
57    }
58
59    void clear() { m_length = 0; m_data.string16Ptr = 0; m_is8Bit = false;}
60
61    bool is8Bit() { return m_is8Bit; }
62
63    bool excludeLineNumbers() const { return !m_doNotExcludeLineNumbers; }
64    bool doNotExcludeLineNumbers() const { return m_doNotExcludeLineNumbers; }
65
66    void setExcludeLineNumbers() { m_doNotExcludeLineNumbers = false; }
67
68    int numberOfCharactersConsumed() const { return m_string.length() - m_length; }
69
70    void appendTo(StringBuilder& builder) const
71    {
72        int offset = m_string.length() - m_length;
73
74        if (!offset) {
75            if (m_length)
76                builder.append(m_string);
77        } else
78            builder.append(m_string.substring(offset, m_length));
79    }
80
81    UChar getCurrentChar8()
82    {
83        return *m_data.string8Ptr;
84    }
85
86    UChar getCurrentChar16()
87    {
88        return m_data.string16Ptr ? *m_data.string16Ptr : 0;
89    }
90
91    UChar incrementAndGetCurrentChar8()
92    {
93        ASSERT(m_data.string8Ptr);
94        return *++m_data.string8Ptr;
95    }
96
97    UChar incrementAndGetCurrentChar16()
98    {
99        ASSERT(m_data.string16Ptr);
100        return *++m_data.string16Ptr;
101    }
102
103    String currentSubString(unsigned length)
104    {
105        int offset = m_string.length() - m_length;
106        return m_string.substring(offset, length);
107    }
108
109    ALWAYS_INLINE UChar getCurrentChar()
110    {
111        ASSERT(m_length);
112        if (is8Bit())
113            return getCurrentChar8();
114        return getCurrentChar16();
115    }
116
117    ALWAYS_INLINE UChar incrementAndGetCurrentChar()
118    {
119        ASSERT(m_length);
120        if (is8Bit())
121            return incrementAndGetCurrentChar8();
122        return incrementAndGetCurrentChar16();
123    }
124
125public:
126    union {
127        const LChar* string8Ptr;
128        const UChar* string16Ptr;
129    } m_data;
130    int m_length;
131
132private:
133    bool m_doNotExcludeLineNumbers;
134    bool m_is8Bit;
135    String m_string;
136};
137
138class SegmentedString {
139public:
140    SegmentedString()
141        : m_pushedChar1(0)
142        , m_pushedChar2(0)
143        , m_currentChar(0)
144        , m_numberOfCharactersConsumedPriorToCurrentString(0)
145        , m_numberOfCharactersConsumedPriorToCurrentLine(0)
146        , m_currentLine(0)
147        , m_closed(false)
148        , m_empty(true)
149        , m_fastPathFlags(NoFastPath)
150        , m_advanceFunc(&SegmentedString::advanceEmpty)
151        , m_advanceAndUpdateLineNumberFunc(&SegmentedString::advanceEmpty)
152    {
153    }
154
155    SegmentedString(const String& str)
156        : m_pushedChar1(0)
157        , m_pushedChar2(0)
158        , m_currentString(str)
159        , m_currentChar(0)
160        , m_numberOfCharactersConsumedPriorToCurrentString(0)
161        , m_numberOfCharactersConsumedPriorToCurrentLine(0)
162        , m_currentLine(0)
163        , m_closed(false)
164        , m_empty(!str.length())
165        , m_fastPathFlags(NoFastPath)
166    {
167        if (m_currentString.m_length)
168            m_currentChar = m_currentString.getCurrentChar();
169        updateAdvanceFunctionPointers();
170    }
171
172    SegmentedString(const SegmentedString&);
173
174    const SegmentedString& operator=(const SegmentedString&);
175
176    void clear();
177    void close();
178
179    void append(const SegmentedString&);
180    void prepend(const SegmentedString&);
181
182    bool excludeLineNumbers() const { return m_currentString.excludeLineNumbers(); }
183    void setExcludeLineNumbers();
184
185    void push(UChar c)
186    {
187        if (!m_pushedChar1) {
188            m_pushedChar1 = c;
189            m_currentChar = m_pushedChar1 ? m_pushedChar1 : m_currentString.getCurrentChar();
190            updateSlowCaseFunctionPointers();
191        } else {
192            ASSERT(!m_pushedChar2);
193            m_pushedChar2 = c;
194        }
195    }
196
197    bool isEmpty() const { return m_empty; }
198    unsigned length() const;
199
200    bool isClosed() const { return m_closed; }
201
202    enum LookAheadResult {
203        DidNotMatch,
204        DidMatch,
205        NotEnoughCharacters,
206    };
207
208    LookAheadResult lookAhead(const String& string) { return lookAheadInline(string, true); }
209    LookAheadResult lookAheadIgnoringCase(const String& string) { return lookAheadInline(string, false); }
210
211    void advance()
212    {
213        if (m_fastPathFlags & Use8BitAdvance) {
214            ASSERT(!m_pushedChar1);
215            bool haveOneCharacterLeft = (--m_currentString.m_length == 1);
216            m_currentChar = m_currentString.incrementAndGetCurrentChar8();
217
218            if (!haveOneCharacterLeft)
219                return;
220
221            updateSlowCaseFunctionPointers();
222
223            return;
224        }
225
226        (this->*m_advanceFunc)();
227    }
228
229    inline void advanceAndUpdateLineNumber()
230    {
231        if (m_fastPathFlags & Use8BitAdvance) {
232            ASSERT(!m_pushedChar1);
233
234            bool haveNewLine = (m_currentChar == '\n') & !!(m_fastPathFlags & Use8BitAdvanceAndUpdateLineNumbers);
235            bool haveOneCharacterLeft = (--m_currentString.m_length == 1);
236
237            m_currentChar = m_currentString.incrementAndGetCurrentChar8();
238
239            if (!(haveNewLine | haveOneCharacterLeft))
240                return;
241
242            if (haveNewLine) {
243                ++m_currentLine;
244                m_numberOfCharactersConsumedPriorToCurrentLine =  m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed();
245            }
246
247            if (haveOneCharacterLeft)
248                updateSlowCaseFunctionPointers();
249
250            return;
251        }
252
253        (this->*m_advanceAndUpdateLineNumberFunc)();
254    }
255
256    void advanceAndASSERT(UChar expectedCharacter)
257    {
258        ASSERT_UNUSED(expectedCharacter, currentChar() == expectedCharacter);
259        advance();
260    }
261
262    void advanceAndASSERTIgnoringCase(UChar expectedCharacter)
263    {
264        ASSERT_UNUSED(expectedCharacter, u_foldCase(currentChar(), U_FOLD_CASE_DEFAULT) == u_foldCase(expectedCharacter, U_FOLD_CASE_DEFAULT));
265        advance();
266    }
267
268    void advancePastNonNewline()
269    {
270        ASSERT(currentChar() != '\n');
271        advance();
272    }
273
274    void advancePastNewlineAndUpdateLineNumber()
275    {
276        ASSERT(currentChar() == '\n');
277        if (!m_pushedChar1 && m_currentString.m_length > 1) {
278            int newLineFlag = m_currentString.doNotExcludeLineNumbers();
279            m_currentLine += newLineFlag;
280            if (newLineFlag)
281                m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;
282            decrementAndCheckLength();
283            m_currentChar = m_currentString.incrementAndGetCurrentChar();
284            return;
285        }
286        advanceAndUpdateLineNumberSlowCase();
287    }
288
289    // Writes the consumed characters into consumedCharacters, which must
290    // have space for at least |count| characters.
291    void advance(unsigned count, UChar* consumedCharacters);
292
293    bool escaped() const { return m_pushedChar1; }
294
295    int numberOfCharactersConsumed() const
296    {
297        int numberOfPushedCharacters = 0;
298        if (m_pushedChar1) {
299            ++numberOfPushedCharacters;
300            if (m_pushedChar2)
301                ++numberOfPushedCharacters;
302        }
303        return m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed() - numberOfPushedCharacters;
304    }
305
306    String toString() const;
307
308    UChar currentChar() const { return m_currentChar; }
309
310    // The method is moderately slow, comparing to currentLine method.
311    OrdinalNumber currentColumn() const;
312    OrdinalNumber currentLine() const;
313    // Sets value of line/column variables. Column is specified indirectly by a parameter columnAftreProlog
314    // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed.
315    void setCurrentPosition(OrdinalNumber line, OrdinalNumber columnAftreProlog, int prologLength);
316
317private:
318    enum FastPathFlags {
319        NoFastPath = 0,
320        Use8BitAdvanceAndUpdateLineNumbers = 1 << 0,
321        Use8BitAdvance = 1 << 1,
322    };
323
324    void append(const SegmentedSubstring&);
325    void prepend(const SegmentedSubstring&);
326
327    void advance8();
328    void advance16();
329    void advanceAndUpdateLineNumber8();
330    void advanceAndUpdateLineNumber16();
331    void advanceSlowCase();
332    void advanceAndUpdateLineNumberSlowCase();
333    void advanceEmpty();
334    void advanceSubstring();
335
336    void updateSlowCaseFunctionPointers();
337
338    void decrementAndCheckLength()
339    {
340        ASSERT(m_currentString.m_length > 1);
341        if (--m_currentString.m_length == 1)
342            updateSlowCaseFunctionPointers();
343    }
344
345    void updateAdvanceFunctionPointers()
346    {
347        if ((m_currentString.m_length > 1) && !m_pushedChar1) {
348            if (m_currentString.is8Bit()) {
349                m_advanceFunc = &SegmentedString::advance8;
350                m_fastPathFlags = Use8BitAdvance;
351                if (m_currentString.doNotExcludeLineNumbers()) {
352                    m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber8;
353                    m_fastPathFlags |= Use8BitAdvanceAndUpdateLineNumbers;
354                } else
355                    m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance8;
356                return;
357            }
358
359            m_advanceFunc = &SegmentedString::advance16;
360            m_fastPathFlags = NoFastPath;
361            if (m_currentString.doNotExcludeLineNumbers())
362                m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceAndUpdateLineNumber16;
363            else
364                m_advanceAndUpdateLineNumberFunc = &SegmentedString::advance16;
365            return;
366        }
367
368        if (!m_currentString.m_length && !isComposite()) {
369            m_advanceFunc = &SegmentedString::advanceEmpty;
370            m_fastPathFlags = NoFastPath;
371            m_advanceAndUpdateLineNumberFunc = &SegmentedString::advanceEmpty;
372        }
373
374        updateSlowCaseFunctionPointers();
375    }
376
377    inline LookAheadResult lookAheadInline(const String& string, bool caseSensitive)
378    {
379        if (!m_pushedChar1 && string.length() <= static_cast<unsigned>(m_currentString.m_length)) {
380            String currentSubstring = m_currentString.currentSubString(string.length());
381            if (currentSubstring.startsWith(string, caseSensitive))
382                return DidMatch;
383            return DidNotMatch;
384        }
385        return lookAheadSlowCase(string, caseSensitive);
386    }
387
388    LookAheadResult lookAheadSlowCase(const String& string, bool caseSensitive)
389    {
390        unsigned count = string.length();
391        if (count > length())
392            return NotEnoughCharacters;
393        UChar* consumedCharacters;
394        String consumedString = String::createUninitialized(count, consumedCharacters);
395        advance(count, consumedCharacters);
396        LookAheadResult result = DidNotMatch;
397        if (consumedString.startsWith(string, caseSensitive))
398            result = DidMatch;
399        prepend(SegmentedString(consumedString));
400        return result;
401    }
402
403    bool isComposite() const { return !m_substrings.isEmpty(); }
404
405    UChar m_pushedChar1;
406    UChar m_pushedChar2;
407    SegmentedSubstring m_currentString;
408    UChar m_currentChar;
409    int m_numberOfCharactersConsumedPriorToCurrentString;
410    int m_numberOfCharactersConsumedPriorToCurrentLine;
411    int m_currentLine;
412    Deque<SegmentedSubstring> m_substrings;
413    bool m_closed;
414    bool m_empty;
415    unsigned char m_fastPathFlags;
416    void (SegmentedString::*m_advanceFunc)();
417    void (SegmentedString::*m_advanceAndUpdateLineNumberFunc)();
418};
419
420}
421
422#endif
423