1/* 2 * Copyright (C) 2000 Lars Knoll (knoll@kde.org) 3 * Copyright (C) 2003, 2004, 2006, 2007, 2008 Apple Inc. All right reserved. 4 * 5 * This library is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either 8 * version 2 of the License, or (at your option) any later version. 9 * 10 * This library is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public License 16 * along with this library; see the file COPYING.LIB. If not, write to 17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 * Boston, MA 02110-1301, USA. 19 * 20 */ 21 22#ifndef BidiResolver_h 23#define BidiResolver_h 24 25#include "BidiContext.h" 26#include "BidiRunList.h" 27#include "TextDirection.h" 28#include <wtf/Noncopyable.h> 29#include <wtf/PassRefPtr.h> 30#include <wtf/Vector.h> 31 32namespace WebCore { 33 34class RenderObject; 35 36template <class Iterator> class MidpointState { 37public: 38 MidpointState() 39 { 40 reset(); 41 } 42 43 void reset() 44 { 45 m_numMidpoints = 0; 46 m_currentMidpoint = 0; 47 m_betweenMidpoints = false; 48 } 49 50 void startIgnoringSpaces(const Iterator& midpoint) 51 { 52 ASSERT(!(m_numMidpoints % 2)); 53 addMidpoint(midpoint); 54 } 55 56 void stopIgnoringSpaces(const Iterator& midpoint) 57 { 58 ASSERT(m_numMidpoints % 2); 59 addMidpoint(midpoint); 60 } 61 62 // When ignoring spaces, this needs to be called for objects that need line boxes such as RenderInlines or 63 // hard line breaks to ensure that they're not ignored. 64 void ensureLineBoxInsideIgnoredSpaces(RenderObject* renderer) 65 { 66 Iterator midpoint(0, renderer, 0); 67 stopIgnoringSpaces(midpoint); 68 startIgnoringSpaces(midpoint); 69 } 70 71 Vector<Iterator>& midpoints() { return m_midpoints; } 72 const unsigned& numMidpoints() const { return m_numMidpoints; } 73 const unsigned& currentMidpoint() const { return m_currentMidpoint; } 74 void incrementCurrentMidpoint() { ++m_currentMidpoint; } 75 void decreaseNumMidpoints() { --m_numMidpoints; } 76 const bool& betweenMidpoints() const { return m_betweenMidpoints; } 77 void setBetweenMidpoints(bool betweenMidpoint) { m_betweenMidpoints = betweenMidpoint; } 78private: 79 // The goal is to reuse the line state across multiple 80 // lines so we just keep an array around for midpoints and never clear it across multiple 81 // lines. We track the number of items and position using the two other variables. 82 Vector<Iterator> m_midpoints; 83 unsigned m_numMidpoints; 84 unsigned m_currentMidpoint; 85 bool m_betweenMidpoints; 86 87 void addMidpoint(const Iterator& midpoint) 88 { 89 if (m_midpoints.size() <= m_numMidpoints) 90 m_midpoints.grow(m_numMidpoints + 10); 91 92 Iterator* midpointsIterator = m_midpoints.data(); 93 midpointsIterator[m_numMidpoints++] = midpoint; 94 } 95}; 96 97// The BidiStatus at a given position (typically the end of a line) can 98// be cached and then used to restart bidi resolution at that position. 99struct BidiStatus { 100 BidiStatus() 101 : eor(U_OTHER_NEUTRAL) 102 , lastStrong(U_OTHER_NEUTRAL) 103 , last(U_OTHER_NEUTRAL) 104 { 105 } 106 107 // Creates a BidiStatus representing a new paragraph root with a default direction. 108 // Uses TextDirection as it only has two possibilities instead of UCharDirection which has at least 19. 109 BidiStatus(TextDirection textDirection, bool isOverride) 110 { 111 UCharDirection direction = textDirection == LTR ? U_LEFT_TO_RIGHT : U_RIGHT_TO_LEFT; 112 eor = lastStrong = last = direction; 113 context = BidiContext::create(textDirection == LTR ? 0 : 1, direction, isOverride); 114 } 115 116 BidiStatus(UCharDirection eorDir, UCharDirection lastStrongDir, UCharDirection lastDir, PassRefPtr<BidiContext> bidiContext) 117 : eor(eorDir) 118 , lastStrong(lastStrongDir) 119 , last(lastDir) 120 , context(bidiContext) 121 { 122 } 123 124 UCharDirection eor; 125 UCharDirection lastStrong; 126 UCharDirection last; 127 RefPtr<BidiContext> context; 128}; 129 130class BidiEmbedding { 131public: 132 BidiEmbedding(UCharDirection direction, BidiEmbeddingSource source) 133 : m_direction(direction) 134 , m_source(source) 135 { 136 } 137 138 UCharDirection direction() const { return m_direction; } 139 BidiEmbeddingSource source() const { return m_source; } 140private: 141 UCharDirection m_direction; 142 BidiEmbeddingSource m_source; 143}; 144 145inline bool operator==(const BidiStatus& status1, const BidiStatus& status2) 146{ 147 return status1.eor == status2.eor && status1.last == status2.last && status1.lastStrong == status2.lastStrong && *(status1.context) == *(status2.context); 148} 149 150inline bool operator!=(const BidiStatus& status1, const BidiStatus& status2) 151{ 152 return !(status1 == status2); 153} 154 155struct BidiCharacterRun { 156 WTF_MAKE_FAST_ALLOCATED; 157public: 158 BidiCharacterRun(int start, int stop, BidiContext* context, UCharDirection direction) 159 : m_override(context->override()) 160 , m_next(0) 161 , m_start(start) 162 , m_stop(stop) 163 { 164 if (direction == U_OTHER_NEUTRAL) 165 direction = context->dir(); 166 167 m_level = context->level(); 168 169 // add level of run (cases I1 & I2) 170 if (m_level % 2) { 171 if (direction == U_LEFT_TO_RIGHT || direction == U_ARABIC_NUMBER || direction == U_EUROPEAN_NUMBER) 172 m_level++; 173 } else { 174 if (direction == U_RIGHT_TO_LEFT) 175 m_level++; 176 else if (direction == U_ARABIC_NUMBER || direction == U_EUROPEAN_NUMBER) 177 m_level += 2; 178 } 179 } 180 181 int start() const { return m_start; } 182 int stop() const { return m_stop; } 183 unsigned char level() const { return m_level; } 184 bool reversed(bool visuallyOrdered) { return m_level % 2 && !visuallyOrdered; } 185 bool dirOverride(bool visuallyOrdered) { return m_override || visuallyOrdered; } 186 187 BidiCharacterRun* next() const { return m_next; } 188 void setNext(BidiCharacterRun* next) { m_next = next; } 189 190 // Do not add anything apart from bitfields until after m_next. See https://bugs.webkit.org/show_bug.cgi?id=100173 191 bool m_override : 1; 192 bool m_hasHyphen : 1; // Used by BidiRun subclass which is a layering violation but enables us to save 8 bytes per object on 64-bit. 193 unsigned char m_level; 194 BidiCharacterRun* m_next; 195 int m_start; 196 int m_stop; 197}; 198 199enum VisualDirectionOverride { 200 NoVisualOverride, 201 VisualLeftToRightOverride, 202 VisualRightToLeftOverride 203}; 204 205// BidiResolver is WebKit's implementation of the Unicode Bidi Algorithm 206// http://unicode.org/reports/tr9 207template <class Iterator, class Run> class BidiResolver { 208 WTF_MAKE_NONCOPYABLE(BidiResolver); 209public: 210 BidiResolver() 211 : m_direction(U_OTHER_NEUTRAL) 212 , m_reachedEndOfLine(false) 213 , m_emptyRun(true) 214 , m_nestedIsolateCount(0) 215 { 216 } 217 218#ifndef NDEBUG 219 ~BidiResolver(); 220#endif 221 222 const Iterator& position() const { return m_current; } 223 void setPositionIgnoringNestedIsolates(const Iterator& position) { m_current = position; } 224 void setPosition(const Iterator& position, unsigned nestedIsolatedCount) 225 { 226 m_current = position; 227 m_nestedIsolateCount = nestedIsolatedCount; 228 } 229 230 void increment() { m_current.increment(); } 231 232 BidiContext* context() const { return m_status.context.get(); } 233 void setContext(PassRefPtr<BidiContext> c) { m_status.context = c; } 234 235 void setLastDir(UCharDirection lastDir) { m_status.last = lastDir; } 236 void setLastStrongDir(UCharDirection lastStrongDir) { m_status.lastStrong = lastStrongDir; } 237 void setEorDir(UCharDirection eorDir) { m_status.eor = eorDir; } 238 239 UCharDirection dir() const { return m_direction; } 240 void setDir(UCharDirection d) { m_direction = d; } 241 242 const BidiStatus& status() const { return m_status; } 243 void setStatus(const BidiStatus s) { m_status = s; } 244 245 MidpointState<Iterator>& midpointState() { return m_midpointState; } 246 247 // The current algorithm handles nested isolates one layer of nesting at a time. 248 // But when we layout each isolated span, we will walk into (and ignore) all 249 // child isolated spans. 250 void enterIsolate() { m_nestedIsolateCount++; } 251 void exitIsolate() { ASSERT(m_nestedIsolateCount >= 1); m_nestedIsolateCount--; } 252 bool inIsolate() const { return m_nestedIsolateCount; } 253 254 void embed(UCharDirection, BidiEmbeddingSource); 255 bool commitExplicitEmbedding(); 256 257 void createBidiRunsForLine(const Iterator& end, VisualDirectionOverride = NoVisualOverride, bool hardLineBreak = false); 258 259 BidiRunList<Run>& runs() { return m_runs; } 260 261 // FIXME: This used to be part of deleteRuns() but was a layering violation. 262 // It's unclear if this is still needed. 263 void markCurrentRunEmpty() { m_emptyRun = true; } 264 265 Vector<Run*>& isolatedRuns() { return m_isolatedRuns; } 266 267protected: 268 // FIXME: Instead of InlineBidiResolvers subclassing this method, we should 269 // pass in some sort of Traits object which knows how to create runs for appending. 270 void appendRun(); 271 272 Iterator m_current; 273 // sor and eor are "start of run" and "end of run" respectively and correpond 274 // to abreviations used in UBA spec: http://unicode.org/reports/tr9/#BD7 275 Iterator m_sor; // Points to the first character in the current run. 276 Iterator m_eor; // Points to the last character in the current run. 277 Iterator m_last; 278 BidiStatus m_status; 279 UCharDirection m_direction; 280 Iterator endOfLine; 281 bool m_reachedEndOfLine; 282 Iterator m_lastBeforeET; // Before a U_EUROPEAN_NUMBER_TERMINATOR 283 bool m_emptyRun; 284 285 // FIXME: This should not belong to the resolver, but rather be passed 286 // into createBidiRunsForLine by the caller. 287 BidiRunList<Run> m_runs; 288 289 MidpointState<Iterator> m_midpointState; 290 291 unsigned m_nestedIsolateCount; 292 Vector<Run*> m_isolatedRuns; 293 294private: 295 void raiseExplicitEmbeddingLevel(UCharDirection from, UCharDirection to); 296 void lowerExplicitEmbeddingLevel(UCharDirection from); 297 void checkDirectionInLowerRaiseEmbeddingLevel(); 298 299 void updateStatusLastFromCurrentDirection(UCharDirection); 300 void reorderRunsFromLevels(); 301 302 Vector<BidiEmbedding, 8> m_currentExplicitEmbeddingSequence; 303}; 304 305#ifndef NDEBUG 306template <class Iterator, class Run> 307BidiResolver<Iterator, Run>::~BidiResolver() 308{ 309 // The owner of this resolver should have handled the isolated runs. 310 ASSERT(m_isolatedRuns.isEmpty()); 311} 312#endif 313 314template <class Iterator, class Run> 315void BidiResolver<Iterator, Run>::appendRun() 316{ 317 if (!m_emptyRun && !m_eor.atEnd()) { 318 unsigned startOffset = m_sor.offset(); 319 unsigned endOffset = m_eor.offset(); 320 321 if (!endOfLine.atEnd() && endOffset >= endOfLine.offset()) { 322 m_reachedEndOfLine = true; 323 endOffset = endOfLine.offset(); 324 } 325 326 if (endOffset >= startOffset) 327 m_runs.addRun(new Run(startOffset, endOffset + 1, context(), m_direction)); 328 329 m_eor.increment(); 330 m_sor = m_eor; 331 } 332 333 m_direction = U_OTHER_NEUTRAL; 334 m_status.eor = U_OTHER_NEUTRAL; 335} 336 337template <class Iterator, class Run> 338void BidiResolver<Iterator, Run>::embed(UCharDirection dir, BidiEmbeddingSource source) 339{ 340 // Isolated spans compute base directionality during their own UBA run. 341 // Do not insert fake embed characters once we enter an isolated span. 342 ASSERT(!inIsolate()); 343 344 ASSERT(dir == U_POP_DIRECTIONAL_FORMAT || dir == U_LEFT_TO_RIGHT_EMBEDDING || dir == U_LEFT_TO_RIGHT_OVERRIDE || dir == U_RIGHT_TO_LEFT_EMBEDDING || dir == U_RIGHT_TO_LEFT_OVERRIDE); 345 m_currentExplicitEmbeddingSequence.append(BidiEmbedding(dir, source)); 346} 347 348template <class Iterator, class Run> 349void BidiResolver<Iterator, Run>::checkDirectionInLowerRaiseEmbeddingLevel() 350{ 351 ASSERT(m_status.eor != U_OTHER_NEUTRAL || m_eor.atEnd()); 352 ASSERT(m_status.last != U_DIR_NON_SPACING_MARK 353 && m_status.last != U_BOUNDARY_NEUTRAL 354 && m_status.last != U_RIGHT_TO_LEFT_EMBEDDING 355 && m_status.last != U_LEFT_TO_RIGHT_EMBEDDING 356 && m_status.last != U_RIGHT_TO_LEFT_OVERRIDE 357 && m_status.last != U_LEFT_TO_RIGHT_OVERRIDE 358 && m_status.last != U_POP_DIRECTIONAL_FORMAT); 359 if (m_direction == U_OTHER_NEUTRAL) 360 m_direction = m_status.lastStrong == U_LEFT_TO_RIGHT ? U_LEFT_TO_RIGHT : U_RIGHT_TO_LEFT; 361} 362 363template <class Iterator, class Run> 364void BidiResolver<Iterator, Run>::lowerExplicitEmbeddingLevel(UCharDirection from) 365{ 366 if (!m_emptyRun && m_eor != m_last) { 367 checkDirectionInLowerRaiseEmbeddingLevel(); 368 // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last 369 if (from == U_LEFT_TO_RIGHT) { 370 // bidi.sor ... bidi.eor ... bidi.last L 371 if (m_status.eor == U_EUROPEAN_NUMBER) { 372 if (m_status.lastStrong != U_LEFT_TO_RIGHT) { 373 m_direction = U_EUROPEAN_NUMBER; 374 appendRun(); 375 } 376 } else if (m_status.eor == U_ARABIC_NUMBER) { 377 m_direction = U_ARABIC_NUMBER; 378 appendRun(); 379 } else if (m_status.lastStrong != U_LEFT_TO_RIGHT) { 380 appendRun(); 381 m_direction = U_LEFT_TO_RIGHT; 382 } 383 } else if (m_status.eor == U_EUROPEAN_NUMBER || m_status.eor == U_ARABIC_NUMBER || m_status.lastStrong == U_LEFT_TO_RIGHT) { 384 appendRun(); 385 m_direction = U_RIGHT_TO_LEFT; 386 } 387 m_eor = m_last; 388 } 389 390 appendRun(); 391 m_emptyRun = true; 392 393 // sor for the new run is determined by the higher level (rule X10) 394 setLastDir(from); 395 setLastStrongDir(from); 396 m_eor = Iterator(); 397} 398 399template <class Iterator, class Run> 400void BidiResolver<Iterator, Run>::raiseExplicitEmbeddingLevel(UCharDirection from, UCharDirection to) 401{ 402 if (!m_emptyRun && m_eor != m_last) { 403 checkDirectionInLowerRaiseEmbeddingLevel(); 404 // bidi.sor ... bidi.eor ... bidi.last eor; need to append the bidi.sor-bidi.eor run or extend it through bidi.last 405 if (to == U_LEFT_TO_RIGHT) { 406 // bidi.sor ... bidi.eor ... bidi.last L 407 if (m_status.eor == U_EUROPEAN_NUMBER) { 408 if (m_status.lastStrong != U_LEFT_TO_RIGHT) { 409 m_direction = U_EUROPEAN_NUMBER; 410 appendRun(); 411 } 412 } else if (m_status.eor == U_ARABIC_NUMBER) { 413 m_direction = U_ARABIC_NUMBER; 414 appendRun(); 415 } else if (m_status.lastStrong != U_LEFT_TO_RIGHT && from == U_LEFT_TO_RIGHT) { 416 appendRun(); 417 m_direction = U_LEFT_TO_RIGHT; 418 } 419 } else if (m_status.eor == U_ARABIC_NUMBER 420 || (m_status.eor == U_EUROPEAN_NUMBER && (m_status.lastStrong != U_LEFT_TO_RIGHT || from == U_RIGHT_TO_LEFT)) 421 || (m_status.eor != U_EUROPEAN_NUMBER && m_status.lastStrong == U_LEFT_TO_RIGHT && from == U_RIGHT_TO_LEFT)) { 422 appendRun(); 423 m_direction = U_RIGHT_TO_LEFT; 424 } 425 m_eor = m_last; 426 } 427 428 appendRun(); 429 m_emptyRun = true; 430 431 setLastDir(to); 432 setLastStrongDir(to); 433 m_eor = Iterator(); 434} 435 436template <class Iterator, class Run> 437bool BidiResolver<Iterator, Run>::commitExplicitEmbedding() 438{ 439 // When we're "inIsolate()" we're resolving the parent context which 440 // ignores (skips over) the isolated content, including embedding levels. 441 // We should never accrue embedding levels while skipping over isolated content. 442 ASSERT(!inIsolate() || m_currentExplicitEmbeddingSequence.isEmpty()); 443 444 unsigned char fromLevel = context()->level(); 445 RefPtr<BidiContext> toContext = context(); 446 447 for (size_t i = 0; i < m_currentExplicitEmbeddingSequence.size(); ++i) { 448 BidiEmbedding embedding = m_currentExplicitEmbeddingSequence[i]; 449 if (embedding.direction() == U_POP_DIRECTIONAL_FORMAT) { 450 if (BidiContext* parentContext = toContext->parent()) 451 toContext = parentContext; 452 } else { 453 UCharDirection direction = (embedding.direction() == U_RIGHT_TO_LEFT_EMBEDDING || embedding.direction() == U_RIGHT_TO_LEFT_OVERRIDE) ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT; 454 bool override = embedding.direction() == U_LEFT_TO_RIGHT_OVERRIDE || embedding.direction() == U_RIGHT_TO_LEFT_OVERRIDE; 455 unsigned char level = toContext->level(); 456 if (direction == U_RIGHT_TO_LEFT) 457 level = nextGreaterOddLevel(level); 458 else 459 level = nextGreaterEvenLevel(level); 460 if (level < 61) 461 toContext = BidiContext::create(level, direction, override, embedding.source(), toContext.get()); 462 } 463 } 464 465 unsigned char toLevel = toContext->level(); 466 467 if (toLevel > fromLevel) 468 raiseExplicitEmbeddingLevel(fromLevel % 2 ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT, toLevel % 2 ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT); 469 else if (toLevel < fromLevel) 470 lowerExplicitEmbeddingLevel(fromLevel % 2 ? U_RIGHT_TO_LEFT : U_LEFT_TO_RIGHT); 471 472 setContext(toContext); 473 474 m_currentExplicitEmbeddingSequence.clear(); 475 476 return fromLevel != toLevel; 477} 478 479template <class Iterator, class Run> 480inline void BidiResolver<Iterator, Run>::updateStatusLastFromCurrentDirection(UCharDirection dirCurrent) 481{ 482 switch (dirCurrent) { 483 case U_EUROPEAN_NUMBER_TERMINATOR: 484 if (m_status.last != U_EUROPEAN_NUMBER) 485 m_status.last = U_EUROPEAN_NUMBER_TERMINATOR; 486 break; 487 case U_EUROPEAN_NUMBER_SEPARATOR: 488 case U_COMMON_NUMBER_SEPARATOR: 489 case U_SEGMENT_SEPARATOR: 490 case U_WHITE_SPACE_NEUTRAL: 491 case U_OTHER_NEUTRAL: 492 switch (m_status.last) { 493 case U_LEFT_TO_RIGHT: 494 case U_RIGHT_TO_LEFT: 495 case U_RIGHT_TO_LEFT_ARABIC: 496 case U_EUROPEAN_NUMBER: 497 case U_ARABIC_NUMBER: 498 m_status.last = dirCurrent; 499 break; 500 default: 501 m_status.last = U_OTHER_NEUTRAL; 502 } 503 break; 504 case U_DIR_NON_SPACING_MARK: 505 case U_BOUNDARY_NEUTRAL: 506 case U_RIGHT_TO_LEFT_EMBEDDING: 507 case U_LEFT_TO_RIGHT_EMBEDDING: 508 case U_RIGHT_TO_LEFT_OVERRIDE: 509 case U_LEFT_TO_RIGHT_OVERRIDE: 510 case U_POP_DIRECTIONAL_FORMAT: 511 // ignore these 512 break; 513 case U_EUROPEAN_NUMBER: 514 FALLTHROUGH; 515 default: 516 m_status.last = dirCurrent; 517 } 518} 519 520template <class Iterator, class Run> 521inline void BidiResolver<Iterator, Run>::reorderRunsFromLevels() 522{ 523 unsigned char levelLow = 128; 524 unsigned char levelHigh = 0; 525 for (Run* run = m_runs.firstRun(); run; run = run->next()) { 526 levelHigh = std::max(run->level(), levelHigh); 527 levelLow = std::min(run->level(), levelLow); 528 } 529 530 // This implements reordering of the line (L2 according to Bidi spec): 531 // http://unicode.org/reports/tr9/#L2 532 // L2. From the highest level found in the text to the lowest odd level on each line, 533 // reverse any contiguous sequence of characters that are at that level or higher. 534 535 // Reversing is only done up to the lowest odd level. 536 if (!(levelLow % 2)) 537 levelLow++; 538 539 unsigned count = m_runs.runCount() - 1; 540 541 while (levelHigh >= levelLow) { 542 unsigned i = 0; 543 Run* run = m_runs.firstRun(); 544 while (i < count) { 545 for (;i < count && run && run->level() < levelHigh; i++) 546 run = run->next(); 547 unsigned start = i; 548 for (;i <= count && run && run->level() >= levelHigh; i++) 549 run = run->next(); 550 unsigned end = i - 1; 551 m_runs.reverseRuns(start, end); 552 } 553 levelHigh--; 554 } 555} 556 557template <class Iterator, class Run> 558void BidiResolver<Iterator, Run>::createBidiRunsForLine(const Iterator& end, VisualDirectionOverride override, bool hardLineBreak) 559{ 560 ASSERT(m_direction == U_OTHER_NEUTRAL); 561 562 if (override != NoVisualOverride) { 563 m_emptyRun = false; 564 m_sor = m_current; 565 m_eor = Iterator(); 566 while (m_current != end && !m_current.atEnd()) { 567 m_eor = m_current; 568 increment(); 569 } 570 m_direction = override == VisualLeftToRightOverride ? U_LEFT_TO_RIGHT : U_RIGHT_TO_LEFT; 571 appendRun(); 572 m_runs.setLogicallyLastRun(m_runs.lastRun()); 573 if (override == VisualRightToLeftOverride && m_runs.runCount()) 574 m_runs.reverseRuns(0, m_runs.runCount() - 1); 575 return; 576 } 577 578 m_emptyRun = true; 579 580 m_eor = Iterator(); 581 582 m_last = m_current; 583 bool pastEnd = false; 584 BidiResolver<Iterator, Run> stateAtEnd; 585 586 while (true) { 587 UCharDirection dirCurrent; 588 if (pastEnd && (hardLineBreak || m_current.atEnd())) { 589 BidiContext* c = context(); 590 if (hardLineBreak) { 591 // A deviation from the Unicode Bidi Algorithm in order to match 592 // WinIE and user expectations: hard line breaks reset bidi state 593 // coming from unicode bidi control characters, but not those from 594 // DOM nodes with specified directionality 595 stateAtEnd.setContext(c->copyStackRemovingUnicodeEmbeddingContexts()); 596 597 dirCurrent = stateAtEnd.context()->dir(); 598 stateAtEnd.setEorDir(dirCurrent); 599 stateAtEnd.setLastDir(dirCurrent); 600 stateAtEnd.setLastStrongDir(dirCurrent); 601 } else { 602 while (c->parent()) 603 c = c->parent(); 604 dirCurrent = c->dir(); 605 } 606 } else { 607 dirCurrent = m_current.direction(); 608 if (context()->override() 609 && dirCurrent != U_RIGHT_TO_LEFT_EMBEDDING 610 && dirCurrent != U_LEFT_TO_RIGHT_EMBEDDING 611 && dirCurrent != U_RIGHT_TO_LEFT_OVERRIDE 612 && dirCurrent != U_LEFT_TO_RIGHT_OVERRIDE 613 && dirCurrent != U_POP_DIRECTIONAL_FORMAT) 614 dirCurrent = context()->dir(); 615 else if (dirCurrent == U_DIR_NON_SPACING_MARK) 616 dirCurrent = m_status.last; 617 } 618 619 // We ignore all character directionality while in unicode-bidi: isolate spans. 620 // We'll handle ordering the isolated characters in a second pass. 621 if (inIsolate()) 622 dirCurrent = U_OTHER_NEUTRAL; 623 624 ASSERT(m_status.eor != U_OTHER_NEUTRAL || m_eor.atEnd()); 625 switch (dirCurrent) { 626 627 // embedding and overrides (X1-X9 in the Bidi specs) 628 case U_RIGHT_TO_LEFT_EMBEDDING: 629 case U_LEFT_TO_RIGHT_EMBEDDING: 630 case U_RIGHT_TO_LEFT_OVERRIDE: 631 case U_LEFT_TO_RIGHT_OVERRIDE: 632 case U_POP_DIRECTIONAL_FORMAT: 633 embed(dirCurrent, FromUnicode); 634 commitExplicitEmbedding(); 635 break; 636 637 // strong types 638 case U_LEFT_TO_RIGHT: 639 switch(m_status.last) { 640 case U_RIGHT_TO_LEFT: 641 case U_RIGHT_TO_LEFT_ARABIC: 642 case U_EUROPEAN_NUMBER: 643 case U_ARABIC_NUMBER: 644 if (m_status.last != U_EUROPEAN_NUMBER || m_status.lastStrong != U_LEFT_TO_RIGHT) 645 appendRun(); 646 break; 647 case U_LEFT_TO_RIGHT: 648 break; 649 case U_EUROPEAN_NUMBER_SEPARATOR: 650 case U_EUROPEAN_NUMBER_TERMINATOR: 651 case U_COMMON_NUMBER_SEPARATOR: 652 case U_BOUNDARY_NEUTRAL: 653 case U_BLOCK_SEPARATOR: 654 case U_SEGMENT_SEPARATOR: 655 case U_WHITE_SPACE_NEUTRAL: 656 case U_OTHER_NEUTRAL: 657 if (m_status.eor == U_EUROPEAN_NUMBER) { 658 if (m_status.lastStrong != U_LEFT_TO_RIGHT) { 659 // the numbers need to be on a higher embedding level, so let's close that run 660 m_direction = U_EUROPEAN_NUMBER; 661 appendRun(); 662 if (context()->dir() != U_LEFT_TO_RIGHT) { 663 // the neutrals take the embedding direction, which is R 664 m_eor = m_last; 665 m_direction = U_RIGHT_TO_LEFT; 666 appendRun(); 667 } 668 } 669 } else if (m_status.eor == U_ARABIC_NUMBER) { 670 // Arabic numbers are always on a higher embedding level, so let's close that run 671 m_direction = U_ARABIC_NUMBER; 672 appendRun(); 673 if (context()->dir() != U_LEFT_TO_RIGHT) { 674 // the neutrals take the embedding direction, which is R 675 m_eor = m_last; 676 m_direction = U_RIGHT_TO_LEFT; 677 appendRun(); 678 } 679 } else if (m_status.lastStrong != U_LEFT_TO_RIGHT) { 680 //last stuff takes embedding dir 681 if (context()->dir() == U_RIGHT_TO_LEFT) { 682 m_eor = m_last; 683 m_direction = U_RIGHT_TO_LEFT; 684 } 685 appendRun(); 686 } 687 break; 688 default: 689 break; 690 } 691 m_eor = m_current; 692 m_status.eor = U_LEFT_TO_RIGHT; 693 m_status.lastStrong = U_LEFT_TO_RIGHT; 694 m_direction = U_LEFT_TO_RIGHT; 695 break; 696 case U_RIGHT_TO_LEFT_ARABIC: 697 case U_RIGHT_TO_LEFT: 698 switch (m_status.last) { 699 case U_LEFT_TO_RIGHT: 700 case U_EUROPEAN_NUMBER: 701 case U_ARABIC_NUMBER: 702 appendRun(); 703 FALLTHROUGH; 704 case U_RIGHT_TO_LEFT: 705 case U_RIGHT_TO_LEFT_ARABIC: 706 break; 707 case U_EUROPEAN_NUMBER_SEPARATOR: 708 case U_EUROPEAN_NUMBER_TERMINATOR: 709 case U_COMMON_NUMBER_SEPARATOR: 710 case U_BOUNDARY_NEUTRAL: 711 case U_BLOCK_SEPARATOR: 712 case U_SEGMENT_SEPARATOR: 713 case U_WHITE_SPACE_NEUTRAL: 714 case U_OTHER_NEUTRAL: 715 if (m_status.eor == U_EUROPEAN_NUMBER) { 716 if (m_status.lastStrong == U_LEFT_TO_RIGHT && context()->dir() == U_LEFT_TO_RIGHT) 717 m_eor = m_last; 718 appendRun(); 719 } else if (m_status.eor == U_ARABIC_NUMBER) 720 appendRun(); 721 else if (m_status.lastStrong == U_LEFT_TO_RIGHT) { 722 if (context()->dir() == U_LEFT_TO_RIGHT) 723 m_eor = m_last; 724 appendRun(); 725 } 726 break; 727 default: 728 break; 729 } 730 m_eor = m_current; 731 m_status.eor = U_RIGHT_TO_LEFT; 732 m_status.lastStrong = dirCurrent; 733 m_direction = U_RIGHT_TO_LEFT; 734 break; 735 736 // weak types: 737 738 case U_EUROPEAN_NUMBER: 739 if (m_status.lastStrong != U_RIGHT_TO_LEFT_ARABIC) { 740 // if last strong was AL change EN to AN 741 switch (m_status.last) { 742 case U_EUROPEAN_NUMBER: 743 case U_LEFT_TO_RIGHT: 744 break; 745 case U_RIGHT_TO_LEFT: 746 case U_RIGHT_TO_LEFT_ARABIC: 747 case U_ARABIC_NUMBER: 748 m_eor = m_last; 749 appendRun(); 750 m_direction = U_EUROPEAN_NUMBER; 751 break; 752 case U_EUROPEAN_NUMBER_SEPARATOR: 753 case U_COMMON_NUMBER_SEPARATOR: 754 if (m_status.eor == U_EUROPEAN_NUMBER) 755 break; 756 FALLTHROUGH; 757 case U_EUROPEAN_NUMBER_TERMINATOR: 758 case U_BOUNDARY_NEUTRAL: 759 case U_BLOCK_SEPARATOR: 760 case U_SEGMENT_SEPARATOR: 761 case U_WHITE_SPACE_NEUTRAL: 762 case U_OTHER_NEUTRAL: 763 if (m_status.eor == U_EUROPEAN_NUMBER) { 764 if (m_status.lastStrong == U_RIGHT_TO_LEFT) { 765 // ENs on both sides behave like Rs, so the neutrals should be R. 766 // Terminate the EN run. 767 appendRun(); 768 // Make an R run. 769 m_eor = m_status.last == U_EUROPEAN_NUMBER_TERMINATOR ? m_lastBeforeET : m_last; 770 m_direction = U_RIGHT_TO_LEFT; 771 appendRun(); 772 // Begin a new EN run. 773 m_direction = U_EUROPEAN_NUMBER; 774 } 775 } else if (m_status.eor == U_ARABIC_NUMBER) { 776 // Terminate the AN run. 777 appendRun(); 778 if (m_status.lastStrong == U_RIGHT_TO_LEFT || context()->dir() == U_RIGHT_TO_LEFT) { 779 // Make an R run. 780 m_eor = m_status.last == U_EUROPEAN_NUMBER_TERMINATOR ? m_lastBeforeET : m_last; 781 m_direction = U_RIGHT_TO_LEFT; 782 appendRun(); 783 // Begin a new EN run. 784 m_direction = U_EUROPEAN_NUMBER; 785 } 786 } else if (m_status.lastStrong == U_RIGHT_TO_LEFT) { 787 // Extend the R run to include the neutrals. 788 m_eor = m_status.last == U_EUROPEAN_NUMBER_TERMINATOR ? m_lastBeforeET : m_last; 789 m_direction = U_RIGHT_TO_LEFT; 790 appendRun(); 791 // Begin a new EN run. 792 m_direction = U_EUROPEAN_NUMBER; 793 } 794 break; 795 default: 796 break; 797 } 798 m_eor = m_current; 799 m_status.eor = U_EUROPEAN_NUMBER; 800 if (m_direction == U_OTHER_NEUTRAL) 801 m_direction = U_LEFT_TO_RIGHT; 802 break; 803 } 804 FALLTHROUGH; 805 case U_ARABIC_NUMBER: 806 dirCurrent = U_ARABIC_NUMBER; 807 switch (m_status.last) { 808 case U_LEFT_TO_RIGHT: 809 if (context()->dir() == U_LEFT_TO_RIGHT) 810 appendRun(); 811 break; 812 case U_ARABIC_NUMBER: 813 break; 814 case U_RIGHT_TO_LEFT: 815 case U_RIGHT_TO_LEFT_ARABIC: 816 case U_EUROPEAN_NUMBER: 817 m_eor = m_last; 818 appendRun(); 819 break; 820 case U_COMMON_NUMBER_SEPARATOR: 821 if (m_status.eor == U_ARABIC_NUMBER) 822 break; 823 FALLTHROUGH; 824 case U_EUROPEAN_NUMBER_SEPARATOR: 825 case U_EUROPEAN_NUMBER_TERMINATOR: 826 case U_BOUNDARY_NEUTRAL: 827 case U_BLOCK_SEPARATOR: 828 case U_SEGMENT_SEPARATOR: 829 case U_WHITE_SPACE_NEUTRAL: 830 case U_OTHER_NEUTRAL: 831 if (m_status.eor == U_ARABIC_NUMBER 832 || (m_status.eor == U_EUROPEAN_NUMBER && (m_status.lastStrong == U_RIGHT_TO_LEFT || context()->dir() == U_RIGHT_TO_LEFT)) 833 || (m_status.eor != U_EUROPEAN_NUMBER && m_status.lastStrong == U_LEFT_TO_RIGHT && context()->dir() == U_RIGHT_TO_LEFT)) { 834 // Terminate the run before the neutrals. 835 appendRun(); 836 // Begin an R run for the neutrals. 837 m_direction = U_RIGHT_TO_LEFT; 838 } else if (m_direction == U_OTHER_NEUTRAL) 839 m_direction = m_status.lastStrong == U_LEFT_TO_RIGHT ? U_LEFT_TO_RIGHT : U_RIGHT_TO_LEFT; 840 m_eor = m_last; 841 appendRun(); 842 break; 843 default: 844 break; 845 } 846 m_eor = m_current; 847 m_status.eor = U_ARABIC_NUMBER; 848 if (m_direction == U_OTHER_NEUTRAL) 849 m_direction = U_ARABIC_NUMBER; 850 break; 851 case U_EUROPEAN_NUMBER_SEPARATOR: 852 case U_COMMON_NUMBER_SEPARATOR: 853 break; 854 case U_EUROPEAN_NUMBER_TERMINATOR: 855 if (m_status.last == U_EUROPEAN_NUMBER) { 856 dirCurrent = U_EUROPEAN_NUMBER; 857 m_eor = m_current; 858 m_status.eor = dirCurrent; 859 } else if (m_status.last != U_EUROPEAN_NUMBER_TERMINATOR) 860 m_lastBeforeET = m_emptyRun ? m_eor : m_last; 861 break; 862 863 // boundary neutrals should be ignored 864 case U_BOUNDARY_NEUTRAL: 865 if (m_eor == m_last) 866 m_eor = m_current; 867 break; 868 // neutrals 869 case U_BLOCK_SEPARATOR: 870 // FIXME: What do we do with newline and paragraph separators that come to here? 871 break; 872 case U_SEGMENT_SEPARATOR: 873 // FIXME: Implement rule L1. 874 break; 875 case U_WHITE_SPACE_NEUTRAL: 876 break; 877 case U_OTHER_NEUTRAL: 878 break; 879 default: 880 break; 881 } 882 883 if (pastEnd && m_eor == m_current) { 884 if (!m_reachedEndOfLine) { 885 m_eor = endOfLine; 886 switch (m_status.eor) { 887 case U_LEFT_TO_RIGHT: 888 case U_RIGHT_TO_LEFT: 889 case U_ARABIC_NUMBER: 890 m_direction = m_status.eor; 891 break; 892 case U_EUROPEAN_NUMBER: 893 m_direction = m_status.lastStrong == U_LEFT_TO_RIGHT ? U_LEFT_TO_RIGHT : U_EUROPEAN_NUMBER; 894 break; 895 default: 896 ASSERT_NOT_REACHED(); 897 } 898 appendRun(); 899 } 900 m_current = end; 901 m_status = stateAtEnd.m_status; 902 m_sor = stateAtEnd.m_sor; 903 m_eor = stateAtEnd.m_eor; 904 m_last = stateAtEnd.m_last; 905 m_reachedEndOfLine = stateAtEnd.m_reachedEndOfLine; 906 m_lastBeforeET = stateAtEnd.m_lastBeforeET; 907 m_emptyRun = stateAtEnd.m_emptyRun; 908 m_direction = U_OTHER_NEUTRAL; 909 break; 910 } 911 912 updateStatusLastFromCurrentDirection(dirCurrent); 913 m_last = m_current; 914 915 if (m_emptyRun) { 916 m_sor = m_current; 917 m_emptyRun = false; 918 } 919 920 increment(); 921 if (!m_currentExplicitEmbeddingSequence.isEmpty()) { 922 bool committed = commitExplicitEmbedding(); 923 if (committed && pastEnd) { 924 m_current = end; 925 m_status = stateAtEnd.m_status; 926 m_sor = stateAtEnd.m_sor; 927 m_eor = stateAtEnd.m_eor; 928 m_last = stateAtEnd.m_last; 929 m_reachedEndOfLine = stateAtEnd.m_reachedEndOfLine; 930 m_lastBeforeET = stateAtEnd.m_lastBeforeET; 931 m_emptyRun = stateAtEnd.m_emptyRun; 932 m_direction = U_OTHER_NEUTRAL; 933 break; 934 } 935 } 936 937 if (!pastEnd && (m_current == end || m_current.atEnd())) { 938 if (m_emptyRun) 939 break; 940 stateAtEnd.m_status = m_status; 941 stateAtEnd.m_sor = m_sor; 942 stateAtEnd.m_eor = m_eor; 943 stateAtEnd.m_last = m_last; 944 stateAtEnd.m_reachedEndOfLine = m_reachedEndOfLine; 945 stateAtEnd.m_lastBeforeET = m_lastBeforeET; 946 stateAtEnd.m_emptyRun = m_emptyRun; 947 endOfLine = m_last; 948 pastEnd = true; 949 } 950 } 951 952 m_runs.setLogicallyLastRun(m_runs.lastRun()); 953 reorderRunsFromLevels(); 954 endOfLine = Iterator(); 955} 956 957} // namespace WebCore 958 959#endif // BidiResolver_h 960