1/*
2 * Copyright (C) 2000 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2003, 2004, 2006, 2007, 2008 Apple Inc.  All right reserved.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB.  If not, write to
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 *
20 */
21
22#ifndef BidiResolver_h
23#define BidiResolver_h
24
25#include "BidiContext.h"
26#include "BidiRunList.h"
27#include "TextDirection.h"
28#include <wtf/Noncopyable.h>
29#include <wtf/PassRefPtr.h>
30#include <wtf/Vector.h>
31
32namespace WebCore {
33
34class RenderObject;
35
36template <class Iterator> class MidpointState {
37public:
38    MidpointState()
39    {
40        reset();
41    }
42
43    void reset()
44    {
45        m_numMidpoints = 0;
46        m_currentMidpoint = 0;
47        m_betweenMidpoints = false;
48    }
49
50    void startIgnoringSpaces(const Iterator& midpoint)
51    {
52        ASSERT(!(m_numMidpoints % 2));
53        addMidpoint(midpoint);
54    }
55
56    void stopIgnoringSpaces(const Iterator& midpoint)
57    {
58        ASSERT(m_numMidpoints % 2);
59        addMidpoint(midpoint);
60    }
61
62    // When ignoring spaces, this needs to be called for objects that need line boxes such as RenderInlines or
63    // hard line breaks to ensure that they're not ignored.
64    void ensureLineBoxInsideIgnoredSpaces(RenderObject* renderer)
65    {
66        Iterator midpoint(0, renderer, 0);
67        stopIgnoringSpaces(midpoint);
68        startIgnoringSpaces(midpoint);
69    }
70
71    Vector<Iterator>& midpoints() { return m_midpoints; }
72    const unsigned& numMidpoints() const { return m_numMidpoints; }
73    const unsigned& currentMidpoint() const { return m_currentMidpoint; }
74    void incrementCurrentMidpoint() { ++m_currentMidpoint; }
75    void decreaseNumMidpoints() { --m_numMidpoints; }
76    const bool& betweenMidpoints() const { return m_betweenMidpoints; }
77    void setBetweenMidpoints(bool betweenMidpoint) { m_betweenMidpoints = betweenMidpoint; }
78private:
79    // The goal is to reuse the line state across multiple
80    // lines so we just keep an array around for midpoints and never clear it across multiple
81    // lines. We track the number of items and position using the two other variables.
82    Vector<Iterator> m_midpoints;
83    unsigned m_numMidpoints;
84    unsigned m_currentMidpoint;
85    bool m_betweenMidpoints;
86
87    void addMidpoint(const Iterator& midpoint)
88    {
89        if (m_midpoints.size() <= m_numMidpoints)
90            m_midpoints.grow(m_numMidpoints + 10);
91
92        Iterator* midpointsIterator = m_midpoints.data();
93        midpointsIterator[m_numMidpoints++] = midpoint;
94    }
95};
96
97// The BidiStatus at a given position (typically the end of a line) can
98// be cached and then used to restart bidi resolution at that position.
99struct BidiStatus {
100    BidiStatus()
101        : eor(U_OTHER_NEUTRAL)
102        , lastStrong(U_OTHER_NEUTRAL)
103        , last(U_OTHER_NEUTRAL)
104    {
105    }
106
107    // Creates a BidiStatus representing a new paragraph root with a default direction.
108    // Uses TextDirection as it only has two possibilities instead of UCharDirection which has at least 19.
109    BidiStatus(TextDirection textDirection, bool isOverride)
110    {
111        UCharDirection direction = textDirection == LTR ? U_LEFT_TO_RIGHT : U_RIGHT_TO_LEFT;
112        eor = lastStrong = last = direction;
113        context = BidiContext::create(textDirection == LTR ? 0 : 1, direction, isOverride);
114    }
115
116    BidiStatus(UCharDirection eorDir, UCharDirection lastStrongDir, UCharDirection lastDir, PassRefPtr<BidiContext> bidiContext)
117        : eor(eorDir)
118        , lastStrong(lastStrongDir)
119        , last(lastDir)
120        , context(bidiContext)
121    {
122    }
123
124    UCharDirection eor;
125    UCharDirection lastStrong;
126    UCharDirection last;
127    RefPtr<BidiContext> context;
128};
129
130class BidiEmbedding {
131public:
132    BidiEmbedding(UCharDirection direction, BidiEmbeddingSource source)
133    : m_direction(direction)
134    , m_source(source)
135    {
136    }
137
138    UCharDirection direction() const { return m_direction; }
139    BidiEmbeddingSource source() const { return m_source; }
140private:
141    UCharDirection m_direction;
142    BidiEmbeddingSource m_source;
143};
144
145inline bool operator==(const BidiStatus& status1, const BidiStatus& status2)
146{
147    return status1.eor == status2.eor && status1.last == status2.last && status1.lastStrong == status2.lastStrong && *(status1.context) == *(status2.context);
148}
149
150inline bool operator!=(const BidiStatus& status1, const BidiStatus& status2)
151{
152    return !(status1 == status2);
153}
154
155struct BidiCharacterRun {
156    WTF_MAKE_FAST_ALLOCATED;
157public:
158    BidiCharacterRun(int start, int stop, BidiContext* context, UCharDirection direction)
159        : m_override(context->override())
160        , m_next(0)
161        , m_start(start)
162        , m_stop(stop)
163    {
164        if (direction == U_OTHER_NEUTRAL)
165            direction = context->dir();
166
167        m_level = context->level();
168
169        // add level of run (cases I1 & I2)
170        if (m_level % 2) {
171            if (direction == U_LEFT_TO_RIGHT || direction == U_ARABIC_NUMBER || direction == U_EUROPEAN_NUMBER)
172                m_level++;
173        } else {
174            if (direction == U_RIGHT_TO_LEFT)
175                m_level++;
176            else if (direction == U_ARABIC_NUMBER || direction == U_EUROPEAN_NUMBER)
177                m_level += 2;
178        }
179    }
180
181    int start() const { return m_start; }
182    int stop() const { return m_stop; }
183    unsigned char level() const { return m_level; }
184    bool reversed(bool visuallyOrdered) { return m_level % 2 && !visuallyOrdered; }
185    bool dirOverride(bool visuallyOrdered) { return m_override || visuallyOrdered; }
186
187    BidiCharacterRun* next() const { return m_next; }
188    void setNext(BidiCharacterRun* next) { m_next = next; }
189
190    // Do not add anything apart from bitfields until after m_next. See https://bugs.webkit.org/show_bug.cgi?id=100173
191    bool m_override : 1;
192    bool m_hasHyphen : 1; // Used by BidiRun subclass which is a layering violation but enables us to save 8 bytes per object on 64-bit.
193    unsigned char m_level;
194    BidiCharacterRun* m_next;
195    int m_start;
196    int m_stop;
197};
198
199enum VisualDirectionOverride {
200    NoVisualOverride,
201    VisualLeftToRightOverride,
202    VisualRightToLeftOverride
203};
204
205// BidiResolver is WebKit's implementation of the Unicode Bidi Algorithm
206// http://unicode.org/reports/tr9
207template <class Iterator, class Run> class BidiResolver {
208    WTF_MAKE_NONCOPYABLE(BidiResolver);
209public:
210    BidiResolver()
211        : m_direction(U_OTHER_NEUTRAL)
212        , m_reachedEndOfLine(false)
213        , m_emptyRun(true)
214        , m_nestedIsolateCount(0)
215    {
216    }
217
218#ifndef NDEBUG
219    ~BidiResolver();
220#endif
221
222    const Iterator& position() const { return m_current; }
223    void setPositionIgnoringNestedIsolates(const Iterator& position) { m_current = position; }
224    void setPosition(const Iterator& position, unsigned nestedIsolatedCount)
225    {
226        m_current = position;
227        m_nestedIsolateCount = nestedIsolatedCount;
228    }
229
230    void increment() { m_current.increment(); }
231
232    BidiContext* context() const { return m_status.context.get(); }
233    void setContext(PassRefPtr<BidiContext> c) { m_status.context = c; }
234
235    void setLastDir(UCharDirection lastDir) { m_status.last = lastDir; }
236    void setLastStrongDir(UCharDirection lastStrongDir) { m_status.lastStrong = lastStrongDir; }
237    void setEorDir(UCharDirection eorDir) { m_status.eor = eorDir; }
238
239    UCharDirection dir() const { return m_direction; }
240    void setDir(UCharDirection d) { m_direction = d; }
241
242    const BidiStatus& status() const { return m_status; }
243    void setStatus(const BidiStatus s) { m_status = s; }
244
245    MidpointState<Iterator>& midpointState() { return m_midpointState; }
246
247    // The current algorithm handles nested isolates one layer of nesting at a time.
248    // But when we layout each isolated span, we will walk into (and ignore) all
249    // child isolated spans.
250    void enterIsolate() { m_nestedIsolateCount++; }
251    void exitIsolate() { ASSERT(m_nestedIsolateCount >= 1); m_nestedIsolateCount--; }
252    bool inIsolate() const { return m_nestedIsolateCount; }
253
254    void embed(UCharDirection, BidiEmbeddingSource);
255    bool commitExplicitEmbedding();
256
257    void createBidiRunsForLine(const Iterator& end, VisualDirectionOverride = NoVisualOverride, bool hardLineBreak = false);
258
259    BidiRunList<Run>& runs() { return m_runs; }
260
261    // FIXME: This used to be part of deleteRuns() but was a layering violation.
262    // It's unclear if this is still needed.
263    void markCurrentRunEmpty() { m_emptyRun = true; }
264
265    Vector<Run*>& isolatedRuns() { return m_isolatedRuns; }
266
267protected:
268    // FIXME: Instead of InlineBidiResolvers subclassing this method, we should
269    // pass in some sort of Traits object which knows how to create runs for appending.
270    void appendRun();
271
272    Iterator m_current;
273    // sor and eor are "start of run" and "end of run" respectively and correpond
274    // to abreviations used in UBA spec: http://unicode.org/reports/tr9/#BD7
275    Iterator m_sor; // Points to the first character in the current run.
276    Iterator m_eor; // Points to the last character in the current run.
277    Iterator m_last;
278    BidiStatus m_status;
279    UCharDirection m_direction;
280    Iterator endOfLine;
281    bool m_reachedEndOfLine;
282    Iterator m_lastBeforeET; // Before a U_EUROPEAN_NUMBER_TERMINATOR
283    bool m_emptyRun;
284
285    // FIXME: This should not belong to the resolver, but rather be passed
286    // into createBidiRunsForLine by the caller.
287    BidiRunList<Run> m_runs;
288
289    MidpointState<Iterator> m_midpointState;
290
291    unsigned m_nestedIsolateCount;
292    Vector<Run*> m_isolatedRuns;
293
294private:
295    void raiseExplicitEmbeddingLevel(UCharDirection from, UCharDirection to);
296    void lowerExplicitEmbeddingLevel(UCharDirection from);
297    void checkDirectionInLowerRaiseEmbeddingLevel();
298
299    void updateStatusLastFromCurrentDirection(UCharDirection);
300    void reorderRunsFromLevels();
301
302    Vector<BidiEmbedding, 8> m_currentExplicitEmbeddingSequence;
303};
304
305#ifndef NDEBUG
306template <class Iterator, class Run>
307BidiResolver<Iterator, Run>::~BidiResolver()
308{
309    // The owner of this resolver should have handled the isolated runs.
310    ASSERT(m_isolatedRuns.isEmpty());
311}
312#endif
313
314template <class Iterator, class Run>
315void BidiResolver<Iterator, Run>::appendRun()
316{
317    if (!m_emptyRun && !m_eor.atEnd()) {
318        unsigned startOffset = m_sor.offset();
319        unsigned endOffset = m_eor.offset();
320
321        if (!endOfLine.atEnd() && endOffset >= endOfLine.offset()) {
322            m_reachedEndOfLine = true;
323            endOffset = endOfLine.offset();
324        }
325
326        if (endOffset >= startOffset)
327            m_runs.addRun(new Run(startOffset, endOffset + 1, context(), m_direction));
328
329        m_eor.increment();
330        m_sor = m_eor;
331    }
332
333    m_direction = U_OTHER_NEUTRAL;
334    m_status.eor = U_OTHER_NEUTRAL;
335}
336
337template <class Iterator, class Run>
338void BidiResolver<Iterator, Run>::embed(UCharDirection dir, BidiEmbeddingSource source)
339{
340    // Isolated spans compute base directionality during their own UBA run.
341    // Do not insert fake embed characters once we enter an isolated span.
342    ASSERT(!inIsolate());
343
344    ASSERT(dir == U_POP_DIRECTIONAL_FORMAT || dir == U_LEFT_TO_RIGHT_EMBEDDING || dir == U_LEFT_TO_RIGHT_OVERRIDE || dir == U_RIGHT_TO_LEFT_EMBEDDING || dir == U_RIGHT_TO_LEFT_OVERRIDE);
345    m_currentExplicitEmbeddingSequence.append(BidiEmbedding(dir, source));
346}
347
348template <class Iterator, class Run>
349void BidiResolver<Iterator, Run>::checkDirectionInLowerRaiseEmbeddingLevel()
350{
351    ASSERT(m_status.eor != U_OTHER_NEUTRAL || m_eor.atEnd());
352    ASSERT(m_status.last != U_DIR_NON_SPACING_MARK
353        && m_status.last != U_BOUNDARY_NEUTRAL
354        && m_status.last != U_RIGHT_TO_LEFT_EMBEDDING
355        && m_status.last != U_LEFT_TO_RIGHT_EMBEDDING
356        && m_status.last != U_RIGHT_TO_LEFT_OVERRIDE
357        && m_status.last != U_LEFT_TO_RIGHT_OVERRIDE
358        && m_status.last != U_POP_DIRECTIONAL_FORMAT);
359    if (m_direction == U_OTHER_NEUTRAL)
360        m_direction = m_status.lastStrong == U_LEFT_TO_RIGHT ? U_LEFT_TO_RIGHT : U_RIGHT_TO_LEFT;
361}
362
363template <class Iterator, class Run>
364void BidiResolver<Iterator, Run>::lowerExplicitEmbeddingLevel(UCharDirection from)
365{
366    if (!m_emptyRun && m_eor != m_last) {
367        checkDirectionInLowerRaiseEmbeddingLevel();
368        // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
369        if (from == U_LEFT_TO_RIGHT) {
370            // bidi.sor ... bidi.eor ... bidi.last L
371            if (m_status.eor == U_EUROPEAN_NUMBER) {
372                if (m_status.lastStrong != U_LEFT_TO_RIGHT) {
373                    m_direction = U_EUROPEAN_NUMBER;
374                    appendRun();
375                }
376            } else if (m_status.eor == U_ARABIC_NUMBER) {
377                m_direction = U_ARABIC_NUMBER;
378                appendRun();
379            } else if (m_status.lastStrong != U_LEFT_TO_RIGHT) {
380                appendRun();
381                m_direction = U_LEFT_TO_RIGHT;
382            }
383        } else if (m_status.eor == U_EUROPEAN_NUMBER || m_status.eor == U_ARABIC_NUMBER || m_status.lastStrong == U_LEFT_TO_RIGHT) {
384            appendRun();
385            m_direction = U_RIGHT_TO_LEFT;
386        }
387        m_eor = m_last;
388    }
389
390    appendRun();
391    m_emptyRun = true;
392
393    // sor for the new run is determined by the higher level (rule X10)
394    setLastDir(from);
395    setLastStrongDir(from);
396    m_eor = Iterator();
397}
398
399template <class Iterator, class Run>
400void BidiResolver<Iterator, Run>::raiseExplicitEmbeddingLevel(UCharDirection from, UCharDirection to)
401{
402    if (!m_emptyRun && m_eor != m_last) {
403        checkDirectionInLowerRaiseEmbeddingLevel();
404        // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last
405        if (to == U_LEFT_TO_RIGHT) {
406            // bidi.sor ... bidi.eor ... bidi.last L
407            if (m_status.eor == U_EUROPEAN_NUMBER) {
408                if (m_status.lastStrong != U_LEFT_TO_RIGHT) {
409                    m_direction = U_EUROPEAN_NUMBER;
410                    appendRun();
411                }
412            } else if (m_status.eor == U_ARABIC_NUMBER) {
413                m_direction = U_ARABIC_NUMBER;
414                appendRun();
415            } else if (m_status.lastStrong != U_LEFT_TO_RIGHT && from == U_LEFT_TO_RIGHT) {
416                appendRun();
417                m_direction = U_LEFT_TO_RIGHT;
418            }
419        } else if (m_status.eor == U_ARABIC_NUMBER
420            || (m_status.eor == U_EUROPEAN_NUMBER && (m_status.lastStrong != U_LEFT_TO_RIGHT || from == U_RIGHT_TO_LEFT))
421            || (m_status.eor != U_EUROPEAN_NUMBER && m_status.lastStrong == U_LEFT_TO_RIGHT && from == U_RIGHT_TO_LEFT)) {
422            appendRun();
423            m_direction = U_RIGHT_TO_LEFT;
424        }
425        m_eor = m_last;
426    }
427
428    appendRun();
429    m_emptyRun = true;
430
431    setLastDir(to);
432    setLastStrongDir(to);
433    m_eor = Iterator();
434}
435
436template <class Iterator, class Run>
437bool BidiResolver<Iterator, Run>::commitExplicitEmbedding()
438{
439    // When we're "inIsolate()" we're resolving the parent context which
440    // ignores (skips over) the isolated content, including embedding levels.
441    // We should never accrue embedding levels while skipping over isolated content.
442    ASSERT(!inIsolate() || m_currentExplicitEmbeddingSequence.isEmpty());
443
444    unsigned char fromLevel = context()->level();
445    RefPtr<BidiContext> toContext = context();
446
447    for (size_t i = 0; i < m_currentExplicitEmbeddingSequence.size(); ++i) {
448        BidiEmbedding embedding = m_currentExplicitEmbeddingSequence[i];
449        if (embedding.direction() == U_POP_DIRECTIONAL_FORMAT) {
450            if (BidiContext* parentContext = toContext->parent())
451                toContext = parentContext;
452        } else {
453            UCharDirection direction = (embedding.direction() == U_RIGHT_TO_LEFT_EMBEDDING || embedding.direction() == U_RIGHT_TO_LEFT_OVERRIDE) ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT;
454            bool override = embedding.direction() == U_LEFT_TO_RIGHT_OVERRIDE || embedding.direction() == U_RIGHT_TO_LEFT_OVERRIDE;
455            unsigned char level = toContext->level();
456            if (direction == U_RIGHT_TO_LEFT)
457                level = nextGreaterOddLevel(level);
458            else
459                level = nextGreaterEvenLevel(level);
460            if (level < 61)
461                toContext = BidiContext::create(level, direction, override, embedding.source(), toContext.get());
462        }
463    }
464
465    unsigned char toLevel = toContext->level();
466
467    if (toLevel > fromLevel)
468        raiseExplicitEmbeddingLevel(fromLevel % 2 ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT, toLevel % 2 ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT);
469    else if (toLevel < fromLevel)
470        lowerExplicitEmbeddingLevel(fromLevel % 2 ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT);
471
472    setContext(toContext);
473
474    m_currentExplicitEmbeddingSequence.clear();
475
476    return fromLevel != toLevel;
477}
478
479template <class Iterator, class Run>
480inline void BidiResolver<Iterator, Run>::updateStatusLastFromCurrentDirection(UCharDirection dirCurrent)
481{
482    switch (dirCurrent) {
483    case U_EUROPEAN_NUMBER_TERMINATOR:
484        if (m_status.last != U_EUROPEAN_NUMBER)
485            m_status.last = U_EUROPEAN_NUMBER_TERMINATOR;
486        break;
487    case U_EUROPEAN_NUMBER_SEPARATOR:
488    case U_COMMON_NUMBER_SEPARATOR:
489    case U_SEGMENT_SEPARATOR:
490    case U_WHITE_SPACE_NEUTRAL:
491    case U_OTHER_NEUTRAL:
492        switch (m_status.last) {
493        case U_LEFT_TO_RIGHT:
494        case U_RIGHT_TO_LEFT:
495        case U_RIGHT_TO_LEFT_ARABIC:
496        case U_EUROPEAN_NUMBER:
497        case U_ARABIC_NUMBER:
498            m_status.last = dirCurrent;
499            break;
500        default:
501            m_status.last = U_OTHER_NEUTRAL;
502        }
503        break;
504    case U_DIR_NON_SPACING_MARK:
505    case U_BOUNDARY_NEUTRAL:
506    case U_RIGHT_TO_LEFT_EMBEDDING:
507    case U_LEFT_TO_RIGHT_EMBEDDING:
508    case U_RIGHT_TO_LEFT_OVERRIDE:
509    case U_LEFT_TO_RIGHT_OVERRIDE:
510    case U_POP_DIRECTIONAL_FORMAT:
511        // ignore these
512        break;
513    case U_EUROPEAN_NUMBER:
514        FALLTHROUGH;
515    default:
516        m_status.last = dirCurrent;
517    }
518}
519
520template <class Iterator, class Run>
521inline void BidiResolver<Iterator, Run>::reorderRunsFromLevels()
522{
523    unsigned char levelLow = 128;
524    unsigned char levelHigh = 0;
525    for (Run* run = m_runs.firstRun(); run; run = run->next()) {
526        levelHigh = std::max(run->level(), levelHigh);
527        levelLow = std::min(run->level(), levelLow);
528    }
529
530    // This implements reordering of the line (L2 according to Bidi spec):
531    // http://unicode.org/reports/tr9/#L2
532    // L2. From the highest level found in the text to the lowest odd level on each line,
533    // reverse any contiguous sequence of characters that are at that level or higher.
534
535    // Reversing is only done up to the lowest odd level.
536    if (!(levelLow % 2))
537        levelLow++;
538
539    unsigned count = m_runs.runCount() - 1;
540
541    while (levelHigh >= levelLow) {
542        unsigned i = 0;
543        Run* run = m_runs.firstRun();
544        while (i < count) {
545            for (;i < count && run && run->level() < levelHigh; i++)
546                run = run->next();
547            unsigned start = i;
548            for (;i <= count && run && run->level() >= levelHigh; i++)
549                run = run->next();
550            unsigned end = i - 1;
551            m_runs.reverseRuns(start, end);
552        }
553        levelHigh--;
554    }
555}
556
557template <class Iterator, class Run>
558void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& end, VisualDirectionOverride override, bool hardLineBreak)
559{
560    ASSERT(m_direction == U_OTHER_NEUTRAL);
561
562    if (override != NoVisualOverride) {
563        m_emptyRun = false;
564        m_sor = m_current;
565        m_eor = Iterator();
566        while (m_current != end && !m_current.atEnd()) {
567            m_eor = m_current;
568            increment();
569        }
570        m_direction = override == VisualLeftToRightOverride ? U_LEFT_TO_RIGHT : U_RIGHT_TO_LEFT;
571        appendRun();
572        m_runs.setLogicallyLastRun(m_runs.lastRun());
573        if (override == VisualRightToLeftOverride && m_runs.runCount())
574            m_runs.reverseRuns(0, m_runs.runCount() - 1);
575        return;
576    }
577
578    m_emptyRun = true;
579
580    m_eor = Iterator();
581
582    m_last = m_current;
583    bool pastEnd = false;
584    BidiResolver<Iterator, Run> stateAtEnd;
585
586    while (true) {
587        UCharDirection dirCurrent;
588        if (pastEnd && (hardLineBreak || m_current.atEnd())) {
589            BidiContext* c = context();
590            if (hardLineBreak) {
591                // A deviation from the Unicode Bidi Algorithm in order to match
592                // WinIE and user expectations: hard line breaks reset bidi state
593                // coming from unicode bidi control characters, but not those from
594                // DOM nodes with specified directionality
595                stateAtEnd.setContext(c->copyStackRemovingUnicodeEmbeddingContexts());
596
597                dirCurrent = stateAtEnd.context()->dir();
598                stateAtEnd.setEorDir(dirCurrent);
599                stateAtEnd.setLastDir(dirCurrent);
600                stateAtEnd.setLastStrongDir(dirCurrent);
601            } else {
602                while (c->parent())
603                    c = c->parent();
604                dirCurrent = c->dir();
605            }
606        } else {
607            dirCurrent = m_current.direction();
608            if (context()->override()
609                    && dirCurrent != U_RIGHT_TO_LEFT_EMBEDDING
610                    && dirCurrent != U_LEFT_TO_RIGHT_EMBEDDING
611                    && dirCurrent != U_RIGHT_TO_LEFT_OVERRIDE
612                    && dirCurrent != U_LEFT_TO_RIGHT_OVERRIDE
613                    && dirCurrent != U_POP_DIRECTIONAL_FORMAT)
614                dirCurrent = context()->dir();
615            else if (dirCurrent == U_DIR_NON_SPACING_MARK)
616                dirCurrent = m_status.last;
617        }
618
619        // We ignore all character directionality while in unicode-bidi: isolate spans.
620        // We'll handle ordering the isolated characters in a second pass.
621        if (inIsolate())
622            dirCurrent = U_OTHER_NEUTRAL;
623
624        ASSERT(m_status.eor != U_OTHER_NEUTRAL || m_eor.atEnd());
625        switch (dirCurrent) {
626
627        // embedding and overrides (X1-X9 in the Bidi specs)
628        case U_RIGHT_TO_LEFT_EMBEDDING:
629        case U_LEFT_TO_RIGHT_EMBEDDING:
630        case U_RIGHT_TO_LEFT_OVERRIDE:
631        case U_LEFT_TO_RIGHT_OVERRIDE:
632        case U_POP_DIRECTIONAL_FORMAT:
633            embed(dirCurrent, FromUnicode);
634            commitExplicitEmbedding();
635            break;
636
637        // strong types
638        case U_LEFT_TO_RIGHT:
639            switch(m_status.last) {
640                case U_RIGHT_TO_LEFT:
641                case U_RIGHT_TO_LEFT_ARABIC:
642                case U_EUROPEAN_NUMBER:
643                case U_ARABIC_NUMBER:
644                    if (m_status.last != U_EUROPEAN_NUMBER || m_status.lastStrong != U_LEFT_TO_RIGHT)
645                        appendRun();
646                    break;
647                case U_LEFT_TO_RIGHT:
648                    break;
649                case U_EUROPEAN_NUMBER_SEPARATOR:
650                case U_EUROPEAN_NUMBER_TERMINATOR:
651                case U_COMMON_NUMBER_SEPARATOR:
652                case U_BOUNDARY_NEUTRAL:
653                case U_BLOCK_SEPARATOR:
654                case U_SEGMENT_SEPARATOR:
655                case U_WHITE_SPACE_NEUTRAL:
656                case U_OTHER_NEUTRAL:
657                    if (m_status.eor == U_EUROPEAN_NUMBER) {
658                        if (m_status.lastStrong != U_LEFT_TO_RIGHT) {
659                            // the numbers need to be on a higher embedding level, so let's close that run
660                            m_direction = U_EUROPEAN_NUMBER;
661                            appendRun();
662                            if (context()->dir() != U_LEFT_TO_RIGHT) {
663                                // the neutrals take the embedding direction, which is R
664                                m_eor = m_last;
665                                m_direction = U_RIGHT_TO_LEFT;
666                                appendRun();
667                            }
668                        }
669                    } else if (m_status.eor == U_ARABIC_NUMBER) {
670                        // Arabic numbers are always on a higher embedding level, so let's close that run
671                        m_direction = U_ARABIC_NUMBER;
672                        appendRun();
673                        if (context()->dir() != U_LEFT_TO_RIGHT) {
674                            // the neutrals take the embedding direction, which is R
675                            m_eor = m_last;
676                            m_direction = U_RIGHT_TO_LEFT;
677                            appendRun();
678                        }
679                    } else if (m_status.lastStrong != U_LEFT_TO_RIGHT) {
680                        //last stuff takes embedding dir
681                        if (context()->dir() == U_RIGHT_TO_LEFT) {
682                            m_eor = m_last;
683                            m_direction = U_RIGHT_TO_LEFT;
684                        }
685                        appendRun();
686                    }
687                    break;
688                default:
689                    break;
690            }
691            m_eor = m_current;
692            m_status.eor = U_LEFT_TO_RIGHT;
693            m_status.lastStrong = U_LEFT_TO_RIGHT;
694            m_direction = U_LEFT_TO_RIGHT;
695            break;
696        case U_RIGHT_TO_LEFT_ARABIC:
697        case U_RIGHT_TO_LEFT:
698            switch (m_status.last) {
699                case U_LEFT_TO_RIGHT:
700                case U_EUROPEAN_NUMBER:
701                case U_ARABIC_NUMBER:
702                    appendRun();
703                    FALLTHROUGH;
704                case U_RIGHT_TO_LEFT:
705                case U_RIGHT_TO_LEFT_ARABIC:
706                    break;
707                case U_EUROPEAN_NUMBER_SEPARATOR:
708                case U_EUROPEAN_NUMBER_TERMINATOR:
709                case U_COMMON_NUMBER_SEPARATOR:
710                case U_BOUNDARY_NEUTRAL:
711                case U_BLOCK_SEPARATOR:
712                case U_SEGMENT_SEPARATOR:
713                case U_WHITE_SPACE_NEUTRAL:
714                case U_OTHER_NEUTRAL:
715                    if (m_status.eor == U_EUROPEAN_NUMBER) {
716                        if (m_status.lastStrong == U_LEFT_TO_RIGHT && context()->dir() == U_LEFT_TO_RIGHT)
717                            m_eor = m_last;
718                        appendRun();
719                    } else if (m_status.eor == U_ARABIC_NUMBER)
720                        appendRun();
721                    else if (m_status.lastStrong == U_LEFT_TO_RIGHT) {
722                        if (context()->dir() == U_LEFT_TO_RIGHT)
723                            m_eor = m_last;
724                        appendRun();
725                    }
726                    break;
727                default:
728                    break;
729            }
730            m_eor = m_current;
731            m_status.eor = U_RIGHT_TO_LEFT;
732            m_status.lastStrong = dirCurrent;
733            m_direction = U_RIGHT_TO_LEFT;
734            break;
735
736            // weak types:
737
738        case U_EUROPEAN_NUMBER:
739            if (m_status.lastStrong != U_RIGHT_TO_LEFT_ARABIC) {
740                // if last strong was AL change EN to AN
741                switch (m_status.last) {
742                    case U_EUROPEAN_NUMBER:
743                    case U_LEFT_TO_RIGHT:
744                        break;
745                    case U_RIGHT_TO_LEFT:
746                    case U_RIGHT_TO_LEFT_ARABIC:
747                    case U_ARABIC_NUMBER:
748                        m_eor = m_last;
749                        appendRun();
750                        m_direction = U_EUROPEAN_NUMBER;
751                        break;
752                    case U_EUROPEAN_NUMBER_SEPARATOR:
753                    case U_COMMON_NUMBER_SEPARATOR:
754                        if (m_status.eor == U_EUROPEAN_NUMBER)
755                            break;
756                        FALLTHROUGH;
757                    case U_EUROPEAN_NUMBER_TERMINATOR:
758                    case U_BOUNDARY_NEUTRAL:
759                    case U_BLOCK_SEPARATOR:
760                    case U_SEGMENT_SEPARATOR:
761                    case U_WHITE_SPACE_NEUTRAL:
762                    case U_OTHER_NEUTRAL:
763                        if (m_status.eor == U_EUROPEAN_NUMBER) {
764                            if (m_status.lastStrong == U_RIGHT_TO_LEFT) {
765                                // ENs on both sides behave like Rs, so the neutrals should be R.
766                                // Terminate the EN run.
767                                appendRun();
768                                // Make an R run.
769                                m_eor = m_status.last == U_EUROPEAN_NUMBER_TERMINATOR ? m_lastBeforeET : m_last;
770                                m_direction = U_RIGHT_TO_LEFT;
771                                appendRun();
772                                // Begin a new EN run.
773                                m_direction = U_EUROPEAN_NUMBER;
774                            }
775                        } else if (m_status.eor == U_ARABIC_NUMBER) {
776                            // Terminate the AN run.
777                            appendRun();
778                            if (m_status.lastStrong == U_RIGHT_TO_LEFT || context()->dir() == U_RIGHT_TO_LEFT) {
779                                // Make an R run.
780                                m_eor = m_status.last == U_EUROPEAN_NUMBER_TERMINATOR ? m_lastBeforeET : m_last;
781                                m_direction = U_RIGHT_TO_LEFT;
782                                appendRun();
783                                // Begin a new EN run.
784                                m_direction = U_EUROPEAN_NUMBER;
785                            }
786                        } else if (m_status.lastStrong == U_RIGHT_TO_LEFT) {
787                            // Extend the R run to include the neutrals.
788                            m_eor = m_status.last == U_EUROPEAN_NUMBER_TERMINATOR ? m_lastBeforeET : m_last;
789                            m_direction = U_RIGHT_TO_LEFT;
790                            appendRun();
791                            // Begin a new EN run.
792                            m_direction = U_EUROPEAN_NUMBER;
793                        }
794                        break;
795                    default:
796                        break;
797                }
798                m_eor = m_current;
799                m_status.eor = U_EUROPEAN_NUMBER;
800                if (m_direction == U_OTHER_NEUTRAL)
801                    m_direction = U_LEFT_TO_RIGHT;
802                break;
803            }
804            FALLTHROUGH;
805        case U_ARABIC_NUMBER:
806            dirCurrent = U_ARABIC_NUMBER;
807            switch (m_status.last) {
808                case U_LEFT_TO_RIGHT:
809                    if (context()->dir() == U_LEFT_TO_RIGHT)
810                        appendRun();
811                    break;
812                case U_ARABIC_NUMBER:
813                    break;
814                case U_RIGHT_TO_LEFT:
815                case U_RIGHT_TO_LEFT_ARABIC:
816                case U_EUROPEAN_NUMBER:
817                    m_eor = m_last;
818                    appendRun();
819                    break;
820                case U_COMMON_NUMBER_SEPARATOR:
821                    if (m_status.eor == U_ARABIC_NUMBER)
822                        break;
823                    FALLTHROUGH;
824                case U_EUROPEAN_NUMBER_SEPARATOR:
825                case U_EUROPEAN_NUMBER_TERMINATOR:
826                case U_BOUNDARY_NEUTRAL:
827                case U_BLOCK_SEPARATOR:
828                case U_SEGMENT_SEPARATOR:
829                case U_WHITE_SPACE_NEUTRAL:
830                case U_OTHER_NEUTRAL:
831                    if (m_status.eor == U_ARABIC_NUMBER
832                        || (m_status.eor == U_EUROPEAN_NUMBER && (m_status.lastStrong == U_RIGHT_TO_LEFT || context()->dir() == U_RIGHT_TO_LEFT))
833                        || (m_status.eor != U_EUROPEAN_NUMBER && m_status.lastStrong == U_LEFT_TO_RIGHT && context()->dir() == U_RIGHT_TO_LEFT)) {
834                        // Terminate the run before the neutrals.
835                        appendRun();
836                        // Begin an R run for the neutrals.
837                        m_direction = U_RIGHT_TO_LEFT;
838                    } else if (m_direction == U_OTHER_NEUTRAL)
839                        m_direction = m_status.lastStrong == U_LEFT_TO_RIGHT ? U_LEFT_TO_RIGHT : U_RIGHT_TO_LEFT;
840                    m_eor = m_last;
841                    appendRun();
842                    break;
843                default:
844                    break;
845            }
846            m_eor = m_current;
847            m_status.eor = U_ARABIC_NUMBER;
848            if (m_direction == U_OTHER_NEUTRAL)
849                m_direction = U_ARABIC_NUMBER;
850            break;
851        case U_EUROPEAN_NUMBER_SEPARATOR:
852        case U_COMMON_NUMBER_SEPARATOR:
853            break;
854        case U_EUROPEAN_NUMBER_TERMINATOR:
855            if (m_status.last == U_EUROPEAN_NUMBER) {
856                dirCurrent = U_EUROPEAN_NUMBER;
857                m_eor = m_current;
858                m_status.eor = dirCurrent;
859            } else if (m_status.last != U_EUROPEAN_NUMBER_TERMINATOR)
860                m_lastBeforeET = m_emptyRun ? m_eor : m_last;
861            break;
862
863        // boundary neutrals should be ignored
864        case U_BOUNDARY_NEUTRAL:
865            if (m_eor == m_last)
866                m_eor = m_current;
867            break;
868            // neutrals
869        case U_BLOCK_SEPARATOR:
870            // FIXME: What do we do with newline and paragraph separators that come to here?
871            break;
872        case U_SEGMENT_SEPARATOR:
873            // FIXME: Implement rule L1.
874            break;
875        case U_WHITE_SPACE_NEUTRAL:
876            break;
877        case U_OTHER_NEUTRAL:
878            break;
879        default:
880            break;
881        }
882
883        if (pastEnd && m_eor == m_current) {
884            if (!m_reachedEndOfLine) {
885                m_eor = endOfLine;
886                switch (m_status.eor) {
887                    case U_LEFT_TO_RIGHT:
888                    case U_RIGHT_TO_LEFT:
889                    case U_ARABIC_NUMBER:
890                        m_direction = m_status.eor;
891                        break;
892                    case U_EUROPEAN_NUMBER:
893                        m_direction = m_status.lastStrong == U_LEFT_TO_RIGHT ? U_LEFT_TO_RIGHT : U_EUROPEAN_NUMBER;
894                        break;
895                    default:
896                        ASSERT_NOT_REACHED();
897                }
898                appendRun();
899            }
900            m_current = end;
901            m_status = stateAtEnd.m_status;
902            m_sor = stateAtEnd.m_sor;
903            m_eor = stateAtEnd.m_eor;
904            m_last = stateAtEnd.m_last;
905            m_reachedEndOfLine = stateAtEnd.m_reachedEndOfLine;
906            m_lastBeforeET = stateAtEnd.m_lastBeforeET;
907            m_emptyRun = stateAtEnd.m_emptyRun;
908            m_direction = U_OTHER_NEUTRAL;
909            break;
910        }
911
912        updateStatusLastFromCurrentDirection(dirCurrent);
913        m_last = m_current;
914
915        if (m_emptyRun) {
916            m_sor = m_current;
917            m_emptyRun = false;
918        }
919
920        increment();
921        if (!m_currentExplicitEmbeddingSequence.isEmpty()) {
922            bool committed = commitExplicitEmbedding();
923            if (committed && pastEnd) {
924                m_current = end;
925                m_status = stateAtEnd.m_status;
926                m_sor = stateAtEnd.m_sor;
927                m_eor = stateAtEnd.m_eor;
928                m_last = stateAtEnd.m_last;
929                m_reachedEndOfLine = stateAtEnd.m_reachedEndOfLine;
930                m_lastBeforeET = stateAtEnd.m_lastBeforeET;
931                m_emptyRun = stateAtEnd.m_emptyRun;
932                m_direction = U_OTHER_NEUTRAL;
933                break;
934            }
935        }
936
937        if (!pastEnd && (m_current == end || m_current.atEnd())) {
938            if (m_emptyRun)
939                break;
940            stateAtEnd.m_status = m_status;
941            stateAtEnd.m_sor = m_sor;
942            stateAtEnd.m_eor = m_eor;
943            stateAtEnd.m_last = m_last;
944            stateAtEnd.m_reachedEndOfLine = m_reachedEndOfLine;
945            stateAtEnd.m_lastBeforeET = m_lastBeforeET;
946            stateAtEnd.m_emptyRun = m_emptyRun;
947            endOfLine = m_last;
948            pastEnd = true;
949        }
950    }
951
952    m_runs.setLogicallyLastRun(m_runs.lastRun());
953    reorderRunsFromLevels();
954    endOfLine = Iterator();
955}
956
957} // namespace WebCore
958
959#endif // BidiResolver_h
960