CollationElementIterator.java revision 12745:f068a4ffddd2
1/* 2 * Copyright (c) 1996, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26/* 27 * (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved 28 * (C) Copyright IBM Corp. 1996-1998 - All Rights Reserved 29 * 30 * The original version of this source code and documentation is copyrighted 31 * and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These 32 * materials are provided under terms of a License Agreement between Taligent 33 * and Sun. This technology is protected by multiple US and International 34 * patents. This notice and attribution to Taligent may not be removed. 35 * Taligent is a registered trademark of Taligent, Inc. 36 * 37 */ 38 39package java.text; 40 41import java.lang.Character; 42import java.util.Vector; 43import sun.text.CollatorUtilities; 44import sun.text.normalizer.NormalizerBase; 45 46/** 47 * The <code>CollationElementIterator</code> class is used as an iterator 48 * to walk through each character of an international string. Use the iterator 49 * to return the ordering priority of the positioned character. The ordering 50 * priority of a character, which we refer to as a key, defines how a character 51 * is collated in the given collation object. 52 * 53 * <p> 54 * For example, consider the following in Spanish: 55 * <blockquote> 56 * <pre> 57 * "ca" → the first key is key('c') and second key is key('a'). 58 * "cha" → the first key is key('ch') and second key is key('a'). 59 * </pre> 60 * </blockquote> 61 * And in German, 62 * <blockquote> 63 * <pre> 64 * "\u00e4b" → the first key is key('a'), the second key is key('e'), and 65 * the third key is key('b'). 66 * </pre> 67 * </blockquote> 68 * The key of a character is an integer composed of primary order(short), 69 * secondary order(byte), and tertiary order(byte). Java strictly defines 70 * the size and signedness of its primitive data types. Therefore, the static 71 * functions <code>primaryOrder</code>, <code>secondaryOrder</code>, and 72 * <code>tertiaryOrder</code> return <code>int</code>, <code>short</code>, 73 * and <code>short</code> respectively to ensure the correctness of the key 74 * value. 75 * 76 * <p> 77 * Example of the iterator usage, 78 * <blockquote> 79 * <pre> 80 * 81 * String testString = "This is a test"; 82 * Collator col = Collator.getInstance(); 83 * if (col instanceof RuleBasedCollator) { 84 * RuleBasedCollator ruleBasedCollator = (RuleBasedCollator)col; 85 * CollationElementIterator collationElementIterator = ruleBasedCollator.getCollationElementIterator(testString); 86 * int primaryOrder = CollationElementIterator.primaryOrder(collationElementIterator.next()); 87 * : 88 * } 89 * </pre> 90 * </blockquote> 91 * 92 * <p> 93 * <code>CollationElementIterator.next</code> returns the collation order 94 * of the next character. A collation order consists of primary order, 95 * secondary order and tertiary order. The data type of the collation 96 * order is <strong>int</strong>. The first 16 bits of a collation order 97 * is its primary order; the next 8 bits is the secondary order and the 98 * last 8 bits is the tertiary order. 99 * 100 * <p><b>Note:</b> <code>CollationElementIterator</code> is a part of 101 * <code>RuleBasedCollator</code> implementation. It is only usable 102 * with <code>RuleBasedCollator</code> instances. 103 * 104 * @see Collator 105 * @see RuleBasedCollator 106 * @author Helena Shih, Laura Werner, Richard Gillam 107 */ 108public final class CollationElementIterator 109{ 110 /** 111 * Null order which indicates the end of string is reached by the 112 * cursor. 113 */ 114 public static final int NULLORDER = 0xffffffff; 115 116 /** 117 * CollationElementIterator constructor. This takes the source string and 118 * the collation object. The cursor will walk thru the source string based 119 * on the predefined collation rules. If the source string is empty, 120 * NULLORDER will be returned on the calls to next(). 121 * @param sourceText the source string. 122 * @param owner the collation object. 123 */ 124 CollationElementIterator(String sourceText, RuleBasedCollator owner) { 125 this.owner = owner; 126 ordering = owner.getTables(); 127 if ( sourceText.length() != 0 ) { 128 NormalizerBase.Mode mode = 129 CollatorUtilities.toNormalizerMode(owner.getDecomposition()); 130 text = new NormalizerBase(sourceText, mode); 131 } 132 } 133 134 /** 135 * CollationElementIterator constructor. This takes the source string and 136 * the collation object. The cursor will walk thru the source string based 137 * on the predefined collation rules. If the source string is empty, 138 * NULLORDER will be returned on the calls to next(). 139 * @param sourceText the source string. 140 * @param owner the collation object. 141 */ 142 CollationElementIterator(CharacterIterator sourceText, RuleBasedCollator owner) { 143 this.owner = owner; 144 ordering = owner.getTables(); 145 NormalizerBase.Mode mode = 146 CollatorUtilities.toNormalizerMode(owner.getDecomposition()); 147 text = new NormalizerBase(sourceText, mode); 148 } 149 150 /** 151 * Resets the cursor to the beginning of the string. The next call 152 * to next() will return the first collation element in the string. 153 */ 154 public void reset() 155 { 156 if (text != null) { 157 text.reset(); 158 NormalizerBase.Mode mode = 159 CollatorUtilities.toNormalizerMode(owner.getDecomposition()); 160 text.setMode(mode); 161 } 162 buffer = null; 163 expIndex = 0; 164 swapOrder = 0; 165 } 166 167 /** 168 * Get the next collation element in the string. <p>This iterator iterates 169 * over a sequence of collation elements that were built from the string. 170 * Because there isn't necessarily a one-to-one mapping from characters to 171 * collation elements, this doesn't mean the same thing as "return the 172 * collation element [or ordering priority] of the next character in the 173 * string".</p> 174 * <p>This function returns the collation element that the iterator is currently 175 * pointing to and then updates the internal pointer to point to the next element. 176 * previous() updates the pointer first and then returns the element. This 177 * means that when you change direction while iterating (i.e., call next() and 178 * then call previous(), or call previous() and then call next()), you'll get 179 * back the same element twice.</p> 180 * 181 * @return the next collation element 182 */ 183 public int next() 184 { 185 if (text == null) { 186 return NULLORDER; 187 } 188 NormalizerBase.Mode textMode = text.getMode(); 189 // convert the owner's mode to something the Normalizer understands 190 NormalizerBase.Mode ownerMode = 191 CollatorUtilities.toNormalizerMode(owner.getDecomposition()); 192 if (textMode != ownerMode) { 193 text.setMode(ownerMode); 194 } 195 196 // if buffer contains any decomposed char values 197 // return their strength orders before continuing in 198 // the Normalizer's CharacterIterator. 199 if (buffer != null) { 200 if (expIndex < buffer.length) { 201 return strengthOrder(buffer[expIndex++]); 202 } else { 203 buffer = null; 204 expIndex = 0; 205 } 206 } else if (swapOrder != 0) { 207 if (Character.isSupplementaryCodePoint(swapOrder)) { 208 char[] chars = Character.toChars(swapOrder); 209 swapOrder = chars[1]; 210 return chars[0] << 16; 211 } 212 int order = swapOrder << 16; 213 swapOrder = 0; 214 return order; 215 } 216 int ch = text.next(); 217 218 // are we at the end of Normalizer's text? 219 if (ch == NormalizerBase.DONE) { 220 return NULLORDER; 221 } 222 223 int value = ordering.getUnicodeOrder(ch); 224 if (value == RuleBasedCollator.UNMAPPED) { 225 swapOrder = ch; 226 return UNMAPPEDCHARVALUE; 227 } 228 else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) { 229 value = nextContractChar(ch); 230 } 231 if (value >= RuleBasedCollator.EXPANDCHARINDEX) { 232 buffer = ordering.getExpandValueList(value); 233 expIndex = 0; 234 value = buffer[expIndex++]; 235 } 236 237 if (ordering.isSEAsianSwapping()) { 238 int consonant; 239 if (isThaiPreVowel(ch)) { 240 consonant = text.next(); 241 if (isThaiBaseConsonant(consonant)) { 242 buffer = makeReorderedBuffer(consonant, value, buffer, true); 243 value = buffer[0]; 244 expIndex = 1; 245 } else if (consonant != NormalizerBase.DONE) { 246 text.previous(); 247 } 248 } 249 if (isLaoPreVowel(ch)) { 250 consonant = text.next(); 251 if (isLaoBaseConsonant(consonant)) { 252 buffer = makeReorderedBuffer(consonant, value, buffer, true); 253 value = buffer[0]; 254 expIndex = 1; 255 } else if (consonant != NormalizerBase.DONE) { 256 text.previous(); 257 } 258 } 259 } 260 261 return strengthOrder(value); 262 } 263 264 /** 265 * Get the previous collation element in the string. <p>This iterator iterates 266 * over a sequence of collation elements that were built from the string. 267 * Because there isn't necessarily a one-to-one mapping from characters to 268 * collation elements, this doesn't mean the same thing as "return the 269 * collation element [or ordering priority] of the previous character in the 270 * string".</p> 271 * <p>This function updates the iterator's internal pointer to point to the 272 * collation element preceding the one it's currently pointing to and then 273 * returns that element, while next() returns the current element and then 274 * updates the pointer. This means that when you change direction while 275 * iterating (i.e., call next() and then call previous(), or call previous() 276 * and then call next()), you'll get back the same element twice.</p> 277 * 278 * @return the previous collation element 279 * @since 1.2 280 */ 281 public int previous() 282 { 283 if (text == null) { 284 return NULLORDER; 285 } 286 NormalizerBase.Mode textMode = text.getMode(); 287 // convert the owner's mode to something the Normalizer understands 288 NormalizerBase.Mode ownerMode = 289 CollatorUtilities.toNormalizerMode(owner.getDecomposition()); 290 if (textMode != ownerMode) { 291 text.setMode(ownerMode); 292 } 293 if (buffer != null) { 294 if (expIndex > 0) { 295 return strengthOrder(buffer[--expIndex]); 296 } else { 297 buffer = null; 298 expIndex = 0; 299 } 300 } else if (swapOrder != 0) { 301 if (Character.isSupplementaryCodePoint(swapOrder)) { 302 char[] chars = Character.toChars(swapOrder); 303 swapOrder = chars[1]; 304 return chars[0] << 16; 305 } 306 int order = swapOrder << 16; 307 swapOrder = 0; 308 return order; 309 } 310 int ch = text.previous(); 311 if (ch == NormalizerBase.DONE) { 312 return NULLORDER; 313 } 314 315 int value = ordering.getUnicodeOrder(ch); 316 317 if (value == RuleBasedCollator.UNMAPPED) { 318 swapOrder = UNMAPPEDCHARVALUE; 319 return ch; 320 } else if (value >= RuleBasedCollator.CONTRACTCHARINDEX) { 321 value = prevContractChar(ch); 322 } 323 if (value >= RuleBasedCollator.EXPANDCHARINDEX) { 324 buffer = ordering.getExpandValueList(value); 325 expIndex = buffer.length; 326 value = buffer[--expIndex]; 327 } 328 329 if (ordering.isSEAsianSwapping()) { 330 int vowel; 331 if (isThaiBaseConsonant(ch)) { 332 vowel = text.previous(); 333 if (isThaiPreVowel(vowel)) { 334 buffer = makeReorderedBuffer(vowel, value, buffer, false); 335 expIndex = buffer.length - 1; 336 value = buffer[expIndex]; 337 } else { 338 text.next(); 339 } 340 } 341 if (isLaoBaseConsonant(ch)) { 342 vowel = text.previous(); 343 if (isLaoPreVowel(vowel)) { 344 buffer = makeReorderedBuffer(vowel, value, buffer, false); 345 expIndex = buffer.length - 1; 346 value = buffer[expIndex]; 347 } else { 348 text.next(); 349 } 350 } 351 } 352 353 return strengthOrder(value); 354 } 355 356 /** 357 * Return the primary component of a collation element. 358 * @param order the collation element 359 * @return the element's primary component 360 */ 361 public static final int primaryOrder(int order) 362 { 363 order &= RBCollationTables.PRIMARYORDERMASK; 364 return (order >>> RBCollationTables.PRIMARYORDERSHIFT); 365 } 366 /** 367 * Return the secondary component of a collation element. 368 * @param order the collation element 369 * @return the element's secondary component 370 */ 371 public static final short secondaryOrder(int order) 372 { 373 order = order & RBCollationTables.SECONDARYORDERMASK; 374 return ((short)(order >> RBCollationTables.SECONDARYORDERSHIFT)); 375 } 376 /** 377 * Return the tertiary component of a collation element. 378 * @param order the collation element 379 * @return the element's tertiary component 380 */ 381 public static final short tertiaryOrder(int order) 382 { 383 return ((short)(order &= RBCollationTables.TERTIARYORDERMASK)); 384 } 385 386 /** 387 * Get the comparison order in the desired strength. Ignore the other 388 * differences. 389 * @param order The order value 390 */ 391 final int strengthOrder(int order) 392 { 393 int s = owner.getStrength(); 394 if (s == Collator.PRIMARY) 395 { 396 order &= RBCollationTables.PRIMARYDIFFERENCEONLY; 397 } else if (s == Collator.SECONDARY) 398 { 399 order &= RBCollationTables.SECONDARYDIFFERENCEONLY; 400 } 401 return order; 402 } 403 404 /** 405 * Sets the iterator to point to the collation element corresponding to 406 * the specified character (the parameter is a CHARACTER offset in the 407 * original string, not an offset into its corresponding sequence of 408 * collation elements). The value returned by the next call to next() 409 * will be the collation element corresponding to the specified position 410 * in the text. If that position is in the middle of a contracting 411 * character sequence, the result of the next call to next() is the 412 * collation element for that sequence. This means that getOffset() 413 * is not guaranteed to return the same value as was passed to a preceding 414 * call to setOffset(). 415 * 416 * @param newOffset The new character offset into the original text. 417 * @since 1.2 418 */ 419 @SuppressWarnings("deprecation") // getBeginIndex, getEndIndex and setIndex are deprecated 420 public void setOffset(int newOffset) 421 { 422 if (text != null) { 423 if (newOffset < text.getBeginIndex() 424 || newOffset >= text.getEndIndex()) { 425 text.setIndexOnly(newOffset); 426 } else { 427 int c = text.setIndex(newOffset); 428 429 // if the desired character isn't used in a contracting character 430 // sequence, bypass all the backing-up logic-- we're sitting on 431 // the right character already 432 if (ordering.usedInContractSeq(c)) { 433 // walk backwards through the string until we see a character 434 // that DOESN'T participate in a contracting character sequence 435 while (ordering.usedInContractSeq(c)) { 436 c = text.previous(); 437 } 438 // now walk forward using this object's next() method until 439 // we pass the starting point and set our current position 440 // to the beginning of the last "character" before or at 441 // our starting position 442 int last = text.getIndex(); 443 while (text.getIndex() <= newOffset) { 444 last = text.getIndex(); 445 next(); 446 } 447 text.setIndexOnly(last); 448 // we don't need this, since last is the last index 449 // that is the starting of the contraction which encompass 450 // newOffset 451 // text.previous(); 452 } 453 } 454 } 455 buffer = null; 456 expIndex = 0; 457 swapOrder = 0; 458 } 459 460 /** 461 * Returns the character offset in the original text corresponding to the next 462 * collation element. (That is, getOffset() returns the position in the text 463 * corresponding to the collation element that will be returned by the next 464 * call to next().) This value will always be the index of the FIRST character 465 * corresponding to the collation element (a contracting character sequence is 466 * when two or more characters all correspond to the same collation element). 467 * This means if you do setOffset(x) followed immediately by getOffset(), getOffset() 468 * won't necessarily return x. 469 * 470 * @return The character offset in the original text corresponding to the collation 471 * element that will be returned by the next call to next(). 472 * @since 1.2 473 */ 474 public int getOffset() 475 { 476 return (text != null) ? text.getIndex() : 0; 477 } 478 479 480 /** 481 * Return the maximum length of any expansion sequences that end 482 * with the specified comparison order. 483 * @param order a collation order returned by previous or next. 484 * @return the maximum length of any expansion sequences ending 485 * with the specified order. 486 * @since 1.2 487 */ 488 public int getMaxExpansion(int order) 489 { 490 return ordering.getMaxExpansion(order); 491 } 492 493 /** 494 * Set a new string over which to iterate. 495 * 496 * @param source the new source text 497 * @since 1.2 498 */ 499 public void setText(String source) 500 { 501 buffer = null; 502 swapOrder = 0; 503 expIndex = 0; 504 NormalizerBase.Mode mode = 505 CollatorUtilities.toNormalizerMode(owner.getDecomposition()); 506 if (text == null) { 507 text = new NormalizerBase(source, mode); 508 } else { 509 text.setMode(mode); 510 text.setText(source); 511 } 512 } 513 514 /** 515 * Set a new string over which to iterate. 516 * 517 * @param source the new source text. 518 * @since 1.2 519 */ 520 public void setText(CharacterIterator source) 521 { 522 buffer = null; 523 swapOrder = 0; 524 expIndex = 0; 525 NormalizerBase.Mode mode = 526 CollatorUtilities.toNormalizerMode(owner.getDecomposition()); 527 if (text == null) { 528 text = new NormalizerBase(source, mode); 529 } else { 530 text.setMode(mode); 531 text.setText(source); 532 } 533 } 534 535 //============================================================ 536 // privates 537 //============================================================ 538 539 /** 540 * Determine if a character is a Thai vowel (which sorts after 541 * its base consonant). 542 */ 543 private static final boolean isThaiPreVowel(int ch) { 544 return (ch >= 0x0e40) && (ch <= 0x0e44); 545 } 546 547 /** 548 * Determine if a character is a Thai base consonant 549 */ 550 private static final boolean isThaiBaseConsonant(int ch) { 551 return (ch >= 0x0e01) && (ch <= 0x0e2e); 552 } 553 554 /** 555 * Determine if a character is a Lao vowel (which sorts after 556 * its base consonant). 557 */ 558 private static final boolean isLaoPreVowel(int ch) { 559 return (ch >= 0x0ec0) && (ch <= 0x0ec4); 560 } 561 562 /** 563 * Determine if a character is a Lao base consonant 564 */ 565 private static final boolean isLaoBaseConsonant(int ch) { 566 return (ch >= 0x0e81) && (ch <= 0x0eae); 567 } 568 569 /** 570 * This method produces a buffer which contains the collation 571 * elements for the two characters, with colFirst's values preceding 572 * another character's. Presumably, the other character precedes colFirst 573 * in logical order (otherwise you wouldn't need this method would you?). 574 * The assumption is that the other char's value(s) have already been 575 * computed. If this char has a single element it is passed to this 576 * method as lastValue, and lastExpansion is null. If it has an 577 * expansion it is passed in lastExpansion, and colLastValue is ignored. 578 */ 579 private int[] makeReorderedBuffer(int colFirst, 580 int lastValue, 581 int[] lastExpansion, 582 boolean forward) { 583 584 int[] result; 585 586 int firstValue = ordering.getUnicodeOrder(colFirst); 587 if (firstValue >= RuleBasedCollator.CONTRACTCHARINDEX) { 588 firstValue = forward? nextContractChar(colFirst) : prevContractChar(colFirst); 589 } 590 591 int[] firstExpansion = null; 592 if (firstValue >= RuleBasedCollator.EXPANDCHARINDEX) { 593 firstExpansion = ordering.getExpandValueList(firstValue); 594 } 595 596 if (!forward) { 597 int temp1 = firstValue; 598 firstValue = lastValue; 599 lastValue = temp1; 600 int[] temp2 = firstExpansion; 601 firstExpansion = lastExpansion; 602 lastExpansion = temp2; 603 } 604 605 if (firstExpansion == null && lastExpansion == null) { 606 result = new int [2]; 607 result[0] = firstValue; 608 result[1] = lastValue; 609 } 610 else { 611 int firstLength = firstExpansion==null? 1 : firstExpansion.length; 612 int lastLength = lastExpansion==null? 1 : lastExpansion.length; 613 result = new int[firstLength + lastLength]; 614 615 if (firstExpansion == null) { 616 result[0] = firstValue; 617 } 618 else { 619 System.arraycopy(firstExpansion, 0, result, 0, firstLength); 620 } 621 622 if (lastExpansion == null) { 623 result[firstLength] = lastValue; 624 } 625 else { 626 System.arraycopy(lastExpansion, 0, result, firstLength, lastLength); 627 } 628 } 629 630 return result; 631 } 632 633 /** 634 * Check if a comparison order is ignorable. 635 * @return true if a character is ignorable, false otherwise. 636 */ 637 static final boolean isIgnorable(int order) 638 { 639 return ((primaryOrder(order) == 0) ? true : false); 640 } 641 642 /** 643 * Get the ordering priority of the next contracting character in the 644 * string. 645 * @param ch the starting character of a contracting character token 646 * @return the next contracting character's ordering. Returns NULLORDER 647 * if the end of string is reached. 648 */ 649 private int nextContractChar(int ch) 650 { 651 // First get the ordering of this single character, 652 // which is always the first element in the list 653 Vector<EntryPair> list = ordering.getContractValues(ch); 654 EntryPair pair = list.firstElement(); 655 int order = pair.value; 656 657 // find out the length of the longest contracting character sequence in the list. 658 // There's logic in the builder code to make sure the longest sequence is always 659 // the last. 660 pair = list.lastElement(); 661 int maxLength = pair.entryName.length(); 662 663 // (the Normalizer is cloned here so that the seeking we do in the next loop 664 // won't affect our real position in the text) 665 NormalizerBase tempText = (NormalizerBase)text.clone(); 666 667 // extract the next maxLength characters in the string (we have to do this using the 668 // Normalizer to ensure that our offsets correspond to those the rest of the 669 // iterator is using) and store it in "fragment". 670 tempText.previous(); 671 key.setLength(0); 672 int c = tempText.next(); 673 while (maxLength > 0 && c != NormalizerBase.DONE) { 674 if (Character.isSupplementaryCodePoint(c)) { 675 key.append(Character.toChars(c)); 676 maxLength -= 2; 677 } else { 678 key.append((char)c); 679 --maxLength; 680 } 681 c = tempText.next(); 682 } 683 String fragment = key.toString(); 684 // now that we have that fragment, iterate through this list looking for the 685 // longest sequence that matches the characters in the actual text. (maxLength 686 // is used here to keep track of the length of the longest sequence) 687 // Upon exit from this loop, maxLength will contain the length of the matching 688 // sequence and order will contain the collation-element value corresponding 689 // to this sequence 690 maxLength = 1; 691 for (int i = list.size() - 1; i > 0; i--) { 692 pair = list.elementAt(i); 693 if (!pair.fwd) 694 continue; 695 696 if (fragment.startsWith(pair.entryName) && pair.entryName.length() 697 > maxLength) { 698 maxLength = pair.entryName.length(); 699 order = pair.value; 700 } 701 } 702 703 // seek our current iteration position to the end of the matching sequence 704 // and return the appropriate collation-element value (if there was no matching 705 // sequence, we're already seeked to the right position and order already contains 706 // the correct collation-element value for the single character) 707 while (maxLength > 1) { 708 c = text.next(); 709 maxLength -= Character.charCount(c); 710 } 711 return order; 712 } 713 714 /** 715 * Get the ordering priority of the previous contracting character in the 716 * string. 717 * @param ch the starting character of a contracting character token 718 * @return the next contracting character's ordering. Returns NULLORDER 719 * if the end of string is reached. 720 */ 721 private int prevContractChar(int ch) 722 { 723 // This function is identical to nextContractChar(), except that we've 724 // switched things so that the next() and previous() calls on the Normalizer 725 // are switched and so that we skip entry pairs with the fwd flag turned on 726 // rather than off. Notice that we still use append() and startsWith() when 727 // working on the fragment. This is because the entry pairs that are used 728 // in reverse iteration have their names reversed already. 729 Vector<EntryPair> list = ordering.getContractValues(ch); 730 EntryPair pair = list.firstElement(); 731 int order = pair.value; 732 733 pair = list.lastElement(); 734 int maxLength = pair.entryName.length(); 735 736 NormalizerBase tempText = (NormalizerBase)text.clone(); 737 738 tempText.next(); 739 key.setLength(0); 740 int c = tempText.previous(); 741 while (maxLength > 0 && c != NormalizerBase.DONE) { 742 if (Character.isSupplementaryCodePoint(c)) { 743 key.append(Character.toChars(c)); 744 maxLength -= 2; 745 } else { 746 key.append((char)c); 747 --maxLength; 748 } 749 c = tempText.previous(); 750 } 751 String fragment = key.toString(); 752 753 maxLength = 1; 754 for (int i = list.size() - 1; i > 0; i--) { 755 pair = list.elementAt(i); 756 if (pair.fwd) 757 continue; 758 759 if (fragment.startsWith(pair.entryName) && pair.entryName.length() 760 > maxLength) { 761 maxLength = pair.entryName.length(); 762 order = pair.value; 763 } 764 } 765 766 while (maxLength > 1) { 767 c = text.previous(); 768 maxLength -= Character.charCount(c); 769 } 770 return order; 771 } 772 773 static final int UNMAPPEDCHARVALUE = 0x7FFF0000; 774 775 private NormalizerBase text = null; 776 private int[] buffer = null; 777 private int expIndex = 0; 778 private StringBuffer key = new StringBuffer(5); 779 private int swapOrder = 0; 780 private RBCollationTables ordering; 781 private RuleBasedCollator owner; 782} 783