1/* 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 3 * 4 * This code is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License version 2 only, as 6 * published by the Free Software Foundation. Oracle designates this 7 * particular file as subject to the "Classpath" exception as provided 8 * by Oracle in the LICENSE file that accompanied this code. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26/* 27 * 28 * (C) Copyright IBM Corp. 1998-2007 - All Rights Reserved 29 * 30 * This file is a modification of the ICU file IndicReordering.cpp 31 * by Jens Herden and Javier Sola for Khmer language 32 * 33 */ 34 35#include "LETypes.h" 36#include "OpenTypeTables.h" 37#include "KhmerReordering.h" 38#include "LEGlyphStorage.h" 39 40 41U_NAMESPACE_BEGIN 42 43// Characters that get referred to by name... 44enum 45{ 46 C_SIGN_ZWNJ = 0x200C, 47 C_SIGN_ZWJ = 0x200D, 48 C_DOTTED_CIRCLE = 0x25CC, 49 C_RO = 0x179A, 50 C_VOWEL_AA = 0x17B6, 51 C_SIGN_NIKAHIT = 0x17C6, 52 C_VOWEL_E = 0x17C1, 53 C_COENG = 0x17D2 54}; 55 56 57enum 58{ 59 // simple classes, they are used in the statetable (in this file) to control the length of a syllable 60 // they are also used to know where a character should be placed (location in reference to the base character) 61 // and also to know if a character, when independtly displayed, should be displayed with a dotted-circle to 62 // indicate error in syllable construction 63 _xx = KhmerClassTable::CC_RESERVED, 64 _sa = KhmerClassTable::CC_SIGN_ABOVE | KhmerClassTable::CF_DOTTED_CIRCLE | KhmerClassTable::CF_POS_ABOVE, 65 _sp = KhmerClassTable::CC_SIGN_AFTER | KhmerClassTable::CF_DOTTED_CIRCLE| KhmerClassTable::CF_POS_AFTER, 66 _c1 = KhmerClassTable::CC_CONSONANT | KhmerClassTable::CF_CONSONANT, 67 _c2 = KhmerClassTable::CC_CONSONANT2 | KhmerClassTable::CF_CONSONANT, 68 _c3 = KhmerClassTable::CC_CONSONANT3 | KhmerClassTable::CF_CONSONANT, 69 _rb = KhmerClassTable::CC_ROBAT | KhmerClassTable::CF_POS_ABOVE | KhmerClassTable::CF_DOTTED_CIRCLE, 70 _cs = KhmerClassTable::CC_CONSONANT_SHIFTER | KhmerClassTable::CF_DOTTED_CIRCLE | KhmerClassTable::CF_SHIFTER, 71 _dl = KhmerClassTable::CC_DEPENDENT_VOWEL | KhmerClassTable::CF_POS_BEFORE | KhmerClassTable::CF_DOTTED_CIRCLE, 72 _db = KhmerClassTable::CC_DEPENDENT_VOWEL | KhmerClassTable::CF_POS_BELOW | KhmerClassTable::CF_DOTTED_CIRCLE, 73 _da = KhmerClassTable::CC_DEPENDENT_VOWEL | KhmerClassTable::CF_POS_ABOVE | KhmerClassTable::CF_DOTTED_CIRCLE | KhmerClassTable::CF_ABOVE_VOWEL, 74 _dr = KhmerClassTable::CC_DEPENDENT_VOWEL | KhmerClassTable::CF_POS_AFTER | KhmerClassTable::CF_DOTTED_CIRCLE, 75 _co = KhmerClassTable::CC_COENG | KhmerClassTable::CF_COENG | KhmerClassTable::CF_DOTTED_CIRCLE, 76 77 // split vowel 78 _va = _da | KhmerClassTable::CF_SPLIT_VOWEL, 79 _vr = _dr | KhmerClassTable::CF_SPLIT_VOWEL 80}; 81 82 83// Character class tables 84// _xx character does not combine into syllable, such as numbers, puntuation marks, non-Khmer signs... 85// _sa Sign placed above the base 86// _sp Sign placed after the base 87// _c1 Consonant of type 1 or independent vowel (independent vowels behave as type 1 consonants) 88// _c2 Consonant of type 2 (only RO) 89// _c3 Consonant of type 3 90// _rb Khmer sign robat u17CC. combining mark for subscript consonants 91// _cd Consonant-shifter 92// _dl Dependent vowel placed before the base (left of the base) 93// _db Dependent vowel placed below the base 94// _da Dependent vowel placed above the base 95// _dr Dependent vowel placed behind the base (right of the base) 96// _co Khmer combining mark COENG u17D2, combines with the consonant or independent vowel following 97// it to create a subscript consonant or independent vowel 98// _va Khmer split vowel in wich the first part is before the base and the second one above the base 99// _vr Khmer split vowel in wich the first part is before the base and the second one behind (right of) the base 100 101static const KhmerClassTable::CharClass khmerCharClasses[] = 102{ 103 _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c1, _c1, // 1780 - 178F 104 _c1, _c1, _c1, _c1, _c3, _c1, _c1, _c1, _c1, _c3, _c2, _c1, _c1, _c1, _c3, _c3, // 1790 - 179F 105 _c1, _c3, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, _c1, // 17A0 - 17AF 106 _c1, _c1, _c1, _c1, _dr, _dr, _dr, _da, _da, _da, _da, _db, _db, _db, _va, _vr, // 17B0 - 17BF 107 _vr, _dl, _dl, _dl, _vr, _vr, _sa, _sp, _sp, _cs, _cs, _sa, _rb, _sa, _sa, _sa, // 17C0 - 17CF 108 _sa, _sa, _co, _sa, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _sa, _xx, _xx, // 17D0 - 17DF 109}; 110 111 112// 113// Khmer Class Tables 114// 115 116// 117// The range of characters defined in the above table is defined here. FOr Khmer 1780 to 17DF 118// Even if the Khmer range is bigger, all other characters are not combinable, and therefore treated 119// as _xx 120static const KhmerClassTable khmerClassTable = {0x1780, 0x17df, khmerCharClasses}; 121 122 123// Below we define how a character in the input string is either in the khmerCharClasses table 124// (in which case we get its type back), a ZWJ or ZWNJ (two characters that may appear 125// within the syllable, but are not in the table) we also get their type back, or an unknown object 126// in which case we get _xx (CC_RESERVED) back 127KhmerClassTable::CharClass KhmerClassTable::getCharClass(LEUnicode ch) const 128{ 129 130 if (ch == C_SIGN_ZWJ) { 131 return CC_ZERO_WIDTH_J_MARK; 132 } 133 134 if (ch == C_SIGN_ZWNJ) { 135 return CC_ZERO_WIDTH_NJ_MARK; 136 } 137 138 if (ch < firstChar || ch > lastChar) { 139 return CC_RESERVED; 140 } 141 142 return classTable[ch - firstChar]; 143} 144 145const KhmerClassTable *KhmerClassTable::getKhmerClassTable() 146{ 147 return &khmerClassTable; 148} 149 150 151 152class KhmerReorderingOutput : public UMemory { 153private: 154 le_int32 fSyllableCount; 155 le_int32 fOutIndex; 156 LEUnicode *fOutChars; 157 158 LEGlyphStorage &fGlyphStorage; 159 160 161public: 162 KhmerReorderingOutput(LEUnicode *outChars, LEGlyphStorage &glyphStorage) 163 : fSyllableCount(0), fOutIndex(0), fOutChars(outChars), fGlyphStorage(glyphStorage) 164 { 165 // nothing else to do... 166 } 167 168 ~KhmerReorderingOutput() 169 { 170 // nothing to do here... 171 } 172 173 void reset() 174 { 175 fSyllableCount += 1; 176 } 177 178 void writeChar(LEUnicode ch, le_uint32 charIndex, FeatureMask charFeatures) 179 { 180 LEErrorCode success = LE_NO_ERROR; 181 182 fOutChars[fOutIndex] = ch; 183 184 fGlyphStorage.setCharIndex(fOutIndex, charIndex, success); 185 fGlyphStorage.setAuxData(fOutIndex, charFeatures | (fSyllableCount & LE_GLYPH_GROUP_MASK), success); 186 187 fOutIndex += 1; 188 } 189 190 le_int32 getOutputIndex() 191 { 192 return fOutIndex; 193 } 194}; 195 196 197#define blwfFeatureTag LE_BLWF_FEATURE_TAG 198#define pstfFeatureTag LE_PSTF_FEATURE_TAG 199#define presFeatureTag LE_PRES_FEATURE_TAG 200#define blwsFeatureTag LE_BLWS_FEATURE_TAG 201#define abvsFeatureTag LE_ABVS_FEATURE_TAG 202#define pstsFeatureTag LE_PSTS_FEATURE_TAG 203 204#define blwmFeatureTag LE_BLWM_FEATURE_TAG 205#define abvmFeatureTag LE_ABVM_FEATURE_TAG 206#define distFeatureTag LE_DIST_FEATURE_TAG 207 208#define prefFeatureTag LE_PREF_FEATURE_TAG 209#define abvfFeatureTag LE_ABVF_FEATURE_TAG 210#define cligFeatureTag LE_CLIG_FEATURE_TAG 211#define mkmkFeatureTag LE_MKMK_FEATURE_TAG 212 213#define prefFeatureMask 0x80000000UL 214#define blwfFeatureMask 0x40000000UL 215#define abvfFeatureMask 0x20000000UL 216#define pstfFeatureMask 0x10000000UL 217#define presFeatureMask 0x08000000UL 218#define blwsFeatureMask 0x04000000UL 219#define abvsFeatureMask 0x02000000UL 220#define pstsFeatureMask 0x01000000UL 221#define cligFeatureMask 0x00800000UL 222#define distFeatureMask 0x00400000UL 223#define blwmFeatureMask 0x00200000UL 224#define abvmFeatureMask 0x00100000UL 225#define mkmkFeatureMask 0x00080000UL 226 227#define tagPref (prefFeatureMask | presFeatureMask | cligFeatureMask | distFeatureMask) 228#define tagAbvf (abvfFeatureMask | abvsFeatureMask | cligFeatureMask | distFeatureMask | abvmFeatureMask | mkmkFeatureMask) 229#define tagPstf (blwfFeatureMask | blwsFeatureMask | prefFeatureMask | presFeatureMask | pstfFeatureMask | pstsFeatureMask | cligFeatureMask | distFeatureMask | blwmFeatureMask) 230#define tagBlwf (blwfFeatureMask | blwsFeatureMask | cligFeatureMask | distFeatureMask | blwmFeatureMask | mkmkFeatureMask) 231#define tagDefault (prefFeatureMask | blwfFeatureMask | presFeatureMask | blwsFeatureMask | cligFeatureMask | distFeatureMask | abvmFeatureMask | blwmFeatureMask | mkmkFeatureMask) 232 233 234 235// These are in the order in which the features need to be applied 236// for correct processing 237static const FeatureMap featureMap[] = 238{ 239 // Shaping features 240 {prefFeatureTag, prefFeatureMask}, 241 {blwfFeatureTag, blwfFeatureMask}, 242 {abvfFeatureTag, abvfFeatureMask}, 243 {pstfFeatureTag, pstfFeatureMask}, 244 {presFeatureTag, presFeatureMask}, 245 {blwsFeatureTag, blwsFeatureMask}, 246 {abvsFeatureTag, abvsFeatureMask}, 247 {pstsFeatureTag, pstsFeatureMask}, 248 {cligFeatureTag, cligFeatureMask}, 249 250 // Positioning features 251 {distFeatureTag, distFeatureMask}, 252 {blwmFeatureTag, blwmFeatureMask}, 253 {abvmFeatureTag, abvmFeatureMask}, 254 {mkmkFeatureTag, mkmkFeatureMask}, 255}; 256 257static const le_int32 featureMapCount = LE_ARRAY_SIZE(featureMap); 258 259// The stateTable is used to calculate the end (the length) of a well 260// formed Khmer Syllable. 261// 262// Each horizontal line is ordered exactly the same way as the values in KhmerClassTable 263// CharClassValues in KhmerReordering.h This coincidence of values allows the 264// follow up of the table. 265// 266// Each line corresponds to a state, which does not necessarily need to be a type 267// of component... for example, state 2 is a base, with is always a first character 268// in the syllable, but the state could be produced a consonant of any type when 269// it is the first character that is analysed (in ground state). 270// 271// Differentiating 3 types of consonants is necessary in order to 272// forbid the use of certain combinations, such as having a second 273// coeng after a coeng RO, 274// The inexistent possibility of having a type 3 after another type 3 is permitted, 275// eliminating it would very much complicate the table, and it does not create typing 276// problems, as the case above. 277// 278// The table is quite complex, in order to limit the number of coeng consonants 279// to 2 (by means of the table). 280// 281// There a peculiarity, as far as Unicode is concerned: 282// - The consonant-shifter is considered in two possible different 283// locations, the one considered in Unicode 3.0 and the one considered in 284// Unicode 4.0. (there is a backwards compatibility problem in this standard). 285 286 287// xx independent character, such as a number, punctuation sign or non-khmer char 288// 289// c1 Khmer consonant of type 1 or an independent vowel 290// that is, a letter in which the subscript for is only under the 291// base, not taking any space to the right or to the left 292// 293// c2 Khmer consonant of type 2, the coeng form takes space under 294// and to the left of the base (only RO is of this type) 295// 296// c3 Khmer consonant of type 3. Its subscript form takes space under 297// and to the right of the base. 298// 299// cs Khmer consonant shifter 300// 301// rb Khmer robat 302// 303// co coeng character (u17D2) 304// 305// dv dependent vowel (including split vowels, they are treated in the same way). 306// even if dv is not defined above, the component that is really tested for is 307// KhmerClassTable::CC_DEPENDENT_VOWEL, which is common to all dependent vowels 308// 309// zwj Zero Width joiner 310// 311// zwnj Zero width non joiner 312// 313// sa above sign 314// 315// sp post sign 316// 317// there are lines with equal content but for an easier understanding 318// (and maybe change in the future) we did not join them 319// 320static const le_int8 khmerStateTable[][KhmerClassTable::CC_COUNT] = 321{ 322 323// xx c1 c2 c3 zwnj cs rb co dv sa sp zwj 324 { 1, 2, 2, 2, 1, 1, 1, 6, 1, 1, 1, 2}, // 0 - ground state 325 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}, // 1 - exit state (or sign to the right of the syllable) 326 {-1, -1, -1, -1, 3, 4, 5, 6, 16, 17, 1, -1}, // 2 - Base consonant 327 {-1, -1, -1, -1, -1, 4, -1, -1, 16, -1, -1, -1}, // 3 - First ZWNJ before a register shifter 328 // It can only be followed by a shifter or a vowel 329 {-1, -1, -1, -1, 15, -1, -1, 6, 16, 17, 1, 14}, // 4 - First register shifter 330 {-1, -1, -1, -1, -1, -1, -1, -1, 20, -1, 1, -1}, // 5 - Robat 331 {-1, 7, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1}, // 6 - First Coeng 332 {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, // 7 - First consonant of type 1 after coeng 333 {-1, -1, -1, -1, 12, 13, -1, -1, 16, 17, 1, 14}, // 8 - First consonant of type 2 after coeng 334 {-1, -1, -1, -1, 12, 13, -1, 10, 16, 17, 1, 14}, // 9 - First consonant or type 3 after ceong 335 {-1, 11, 11, 11, -1, -1, -1, -1, -1, -1, -1, -1}, // 10 - Second Coeng (no register shifter before) 336 {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, // 11 - Second coeng consonant (or ind. vowel) no register shifter before 337 {-1, -1, -1, -1, -1, 13, -1, -1, 16, -1, -1, -1}, // 12 - Second ZWNJ before a register shifter 338 {-1, -1, -1, -1, 15, -1, -1, -1, 16, 17, 1, 14}, // 13 - Second register shifter 339 {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, // 14 - ZWJ before vowel 340 {-1, -1, -1, -1, -1, -1, -1, -1, 16, -1, -1, -1}, // 15 - ZWNJ before vowel 341 {-1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 1, 18}, // 16 - dependent vowel 342 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 18}, // 17 - sign above 343 {-1, -1, -1, -1, -1, -1, -1, 19, -1, -1, -1, -1}, // 18 - ZWJ after vowel 344 {-1, 1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1}, // 19 - Third coeng 345 {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1}, // 20 - dependent vowel after a Robat 346 347}; 348 349 350const FeatureMap *KhmerReordering::getFeatureMap(le_int32 &count) 351{ 352 count = featureMapCount; 353 354 return featureMap; 355} 356 357 358// Given an input string of characters and a location in which to start looking 359// calculate, using the state table, which one is the last character of the syllable 360// that starts in the starting position. 361le_int32 KhmerReordering::findSyllable(const KhmerClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount) 362{ 363 le_int32 cursor = prev; 364 le_int8 state = 0; 365 366 while (cursor < charCount) { 367 KhmerClassTable::CharClass charClass = (classTable->getCharClass(chars[cursor]) & KhmerClassTable::CF_CLASS_MASK); 368 369 state = khmerStateTable[state][charClass]; 370 371 if (state < 0) { 372 break; 373 } 374 375 cursor += 1; 376 } 377 378 return cursor; 379} 380 381 382// This is the real reordering function as applied to the Khmer language 383 384le_int32 KhmerReordering::reorder(const LEUnicode *chars, le_int32 charCount, le_int32 /*scriptCode*/, 385 LEUnicode *outChars, LEGlyphStorage &glyphStorage) 386{ 387 const KhmerClassTable *classTable = KhmerClassTable::getKhmerClassTable(); 388 389 KhmerReorderingOutput output(outChars, glyphStorage); 390 KhmerClassTable::CharClass charClass; 391 le_int32 i, prev = 0, coengRo; 392 393 394 // This loop only exits when we reach the end of a run, which may contain 395 // several syllables. 396 while (prev < charCount) { 397 le_int32 syllable = findSyllable(classTable, chars, prev, charCount); 398 399 output.reset(); 400 401 // write a pre vowel or the pre part of a split vowel first 402 // and look out for coeng + ro. RO is the only vowel of type 2, and 403 // therefore the only one that requires saving space before the base. 404 coengRo = -1; // There is no Coeng Ro, if found this value will change 405 for (i = prev; i < syllable; i += 1) { 406 charClass = classTable->getCharClass(chars[i]); 407 408 // if a split vowel, write the pre part. In Khmer the pre part 409 // is the same for all split vowels, same glyph as pre vowel C_VOWEL_E 410 if (charClass & KhmerClassTable::CF_SPLIT_VOWEL) { 411 output.writeChar(C_VOWEL_E, i, tagPref); 412 break; // there can be only one vowel 413 } 414 415 // if a vowel with pos before write it out 416 if (charClass & KhmerClassTable::CF_POS_BEFORE) { 417 output.writeChar(chars[i], i, tagPref); 418 break; // there can be only one vowel 419 } 420 421 // look for coeng + ro and remember position 422 // works because coeng + ro is always in front of a vowel (if there is a vowel) 423 // and because CC_CONSONANT2 is enough to identify it, as it is the only consonant 424 // with this flag 425 if ( (charClass & KhmerClassTable::CF_COENG) && (i + 1 < syllable) && 426 ( (classTable->getCharClass(chars[i + 1]) & KhmerClassTable::CF_CLASS_MASK) == KhmerClassTable::CC_CONSONANT2) ) 427 { 428 coengRo = i; 429 } 430 } 431 432 // write coeng + ro if found 433 if (coengRo > -1) { 434 output.writeChar(C_COENG, coengRo, tagPref); 435 output.writeChar(C_RO, coengRo + 1, tagPref); 436 } 437 438 // shall we add a dotted circle? 439 // If in the position in which the base should be (first char in the string) there is 440 // a character that has the Dotted circle flag (a character that cannot be a base) 441 // then write a dotted circle 442 if (classTable->getCharClass(chars[prev]) & KhmerClassTable::CF_DOTTED_CIRCLE) { 443 output.writeChar(C_DOTTED_CIRCLE, prev, tagDefault); 444 } 445 446 // copy what is left to the output, skipping before vowels and coeng Ro if they are present 447 for (i = prev; i < syllable; i += 1) { 448 charClass = classTable->getCharClass(chars[i]); 449 450 // skip a before vowel, it was already processed 451 if (charClass & KhmerClassTable::CF_POS_BEFORE) { 452 continue; 453 } 454 455 // skip coeng + ro, it was already processed 456 if (i == coengRo) { 457 i += 1; 458 continue; 459 } 460 461 switch (charClass & KhmerClassTable::CF_POS_MASK) { 462 case KhmerClassTable::CF_POS_ABOVE : 463 output.writeChar(chars[i], i, tagAbvf); 464 break; 465 466 case KhmerClassTable::CF_POS_AFTER : 467 output.writeChar(chars[i], i, tagPstf); 468 break; 469 470 case KhmerClassTable::CF_POS_BELOW : 471 output.writeChar(chars[i], i, tagBlwf); 472 break; 473 474 default: 475 // assign the correct flags to a coeng consonant 476 // Consonants of type 3 are taged as Post forms and those type 1 as below forms 477 if ( (charClass & KhmerClassTable::CF_COENG) && i + 1 < syllable ) { 478 if ( (classTable->getCharClass(chars[i + 1]) & KhmerClassTable::CF_CLASS_MASK) 479 == KhmerClassTable::CC_CONSONANT3) { 480 output.writeChar(chars[i], i, tagPstf); 481 i += 1; 482 output.writeChar(chars[i], i, tagPstf); 483 } 484 else { 485 output.writeChar(chars[i], i, tagBlwf); 486 i += 1; 487 output.writeChar(chars[i], i, tagBlwf); 488 } 489 break; 490 } 491 // if a shifter is followed by an above vowel change the shifter to below form, 492 // an above vowel can have two possible positions i + 1 or i + 3 493 // (position i+1 corresponds to unicode 3, position i+3 to Unicode 4) 494 // and there is an extra rule for C_VOWEL_AA + C_SIGN_NIKAHIT also for two 495 // different positions, right after the shifter or after a vowel (Unicode 4) 496 if ( (charClass & KhmerClassTable::CF_SHIFTER) && (i + 1 < syllable) ) { 497 if ((classTable->getCharClass(chars[i + 1]) & KhmerClassTable::CF_ABOVE_VOWEL) 498 || (i + 2 < syllable 499 && ( (classTable->getCharClass(chars[i + 1]) & KhmerClassTable::CF_CLASS_MASK) == C_VOWEL_AA) 500 && ( (classTable->getCharClass(chars[i + 2]) & KhmerClassTable::CF_CLASS_MASK) == C_SIGN_NIKAHIT)) 501 || (i + 3 < syllable && (classTable->getCharClass(chars[i + 3]) & KhmerClassTable::CF_ABOVE_VOWEL)) 502 || (i + 4 < syllable 503 && ( (classTable->getCharClass(chars[i + 3]) & KhmerClassTable::CF_CLASS_MASK) == C_VOWEL_AA) 504 && ( (classTable->getCharClass(chars[i + 4]) & KhmerClassTable::CF_CLASS_MASK) == C_SIGN_NIKAHIT) ) ) 505 { 506 output.writeChar(chars[i], i, tagBlwf); 507 break; 508 } 509 510 } 511 // default - any other characters 512 output.writeChar(chars[i], i, tagDefault); 513 break; 514 } // switch 515 } // for 516 517 prev = syllable; // move the pointer to the start of next syllable 518 } 519 520 return output.getOutputIndex(); 521} 522 523 524U_NAMESPACE_END 525