1/* 2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 3 * 4 * This code is free software; you can redistribute it and/or modify it 5 * under the terms of the GNU General Public License version 2 only, as 6 * published by the Free Software Foundation. Oracle designates this 7 * particular file as subject to the "Classpath" exception as provided 8 * by Oracle in the LICENSE file that accompanied this code. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26/* 27 * (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved 28 * 29 * This file is a modification of the ICU file IndicReordering.h 30 * by Jens Herden and Javier Sola for Khmer language 31 * 32 */ 33 34#ifndef __KHMERREORDERING_H 35#define __KHMERREORDERING_H 36 37/** 38 * \file 39 * \internal 40 */ 41 42#include "LETypes.h" 43#include "OpenTypeTables.h" 44 45U_NAMESPACE_BEGIN 46 47class LEGlyphStorage; 48 49// Vocabulary 50// Base -> A consonant or an independent vowel in its full (not subscript) form. It is the 51// center of the syllable, it can be souranded by coeng (subscript) consonants, vowels, 52// split vowels, signs... but there is only one base in a syllable, it has to be coded as 53// the first character of the syllable. 54// split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant). 55// Khmer language has five of them. Khmer split vowels either have one part before the 56// base and one after the base or they have a part before the base and a part above the base. 57// The first part of all Khmer split vowels is the same character, identical to 58// the glyph of Khmer dependent vowel SRA EI 59// coeng --> modifier used in Khmer to construct coeng (subscript) consonants 60// Differently than indian languages, the coeng modifies the consonant that follows it, 61// not the one preceding it Each consonant has two forms, the base form and the subscript form 62// the base form is the normal one (using the consonants code-point), the subscript form is 63// displayed when the combination coeng + consonant is encountered. 64// Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant 65// Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO) 66// Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA) 67// Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds 68// if it is attached to a consonant of the first series or a consonant of the second series 69// Most consonants have an equivalent in the other series, but some of theme exist only in 70// one series (for example SA). If we want to use the consonant SA with a vowel sound that 71// can only be done with a vowel sound that corresponds to a vowel accompanying a consonant 72// of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN 73// x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and 74// MUSIKATOAN a second series consonant to have a first series vowel sound. 75// Consonant shifter are both normally supercript marks, but, when they are followed by a 76// superscript, they change shape and take the form of subscript dependent vowel SRA U. 77// If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they 78// should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should 79// be placed after the coeng consonant. 80// Dependent vowel -> In khmer dependent vowels can be placed above, below, before or after the base 81// Each vowel has its own position. Only one vowel per syllable is allowed. 82// Signs -> Khmer has above signs and post signs. Only one above sign and/or one post sign are 83// Allowed in a syllable. 84// 85// 86 87struct KhmerClassTable // This list must include all types of components that can be used inside a syllable 88{ 89 enum CharClassValues // order is important here! This order must be the same that is found in each horizontal 90 // line in the statetable for Khmer (file KhmerReordering.cpp). 91 { 92 CC_RESERVED = 0, 93 CC_CONSONANT = 1, // consonant of type 1 or independent vowel 94 CC_CONSONANT2 = 2, // Consonant of type 2 95 CC_CONSONANT3 = 3, // Consonant of type 3 96 CC_ZERO_WIDTH_NJ_MARK = 4, // Zero Width non joiner character (0x200C) 97 CC_CONSONANT_SHIFTER = 5, 98 CC_ROBAT = 6, // Khmer special diacritic accent -treated differently in state table 99 CC_COENG = 7, // Subscript consonant combining character 100 CC_DEPENDENT_VOWEL = 8, 101 CC_SIGN_ABOVE = 9, 102 CC_SIGN_AFTER = 10, 103 CC_ZERO_WIDTH_J_MARK = 11, // Zero width joiner character 104 CC_COUNT = 12 // This is the number of character classes 105 }; 106 107 enum CharClassFlags 108 { 109 CF_CLASS_MASK = 0x0000FFFF, 110 111 CF_CONSONANT = 0x01000000, // flag to speed up comparing 112 CF_SPLIT_VOWEL = 0x02000000, // flag for a split vowel -> the first part is added in front of the syllable 113 CF_DOTTED_CIRCLE = 0x04000000, // add a dotted circle if a character with this flag is the first in a syllable 114 CF_COENG = 0x08000000, // flag to speed up comparing 115 CF_SHIFTER = 0x10000000, // flag to speed up comparing 116 CF_ABOVE_VOWEL = 0x20000000, // flag to speed up comparing 117 118 // position flags 119 CF_POS_BEFORE = 0x00080000, 120 CF_POS_BELOW = 0x00040000, 121 CF_POS_ABOVE = 0x00020000, 122 CF_POS_AFTER = 0x00010000, 123 CF_POS_MASK = 0x000f0000 124 }; 125 126 typedef le_uint32 CharClass; 127 128 typedef le_int32 ScriptFlags; 129 130 LEUnicode firstChar; // for Khmer this will become x1780 131 LEUnicode lastChar; // and this x17DF 132 const CharClass *classTable; 133 134 CharClass getCharClass(LEUnicode ch) const; 135 136 static const KhmerClassTable *getKhmerClassTable(); 137}; 138 139 140class KhmerReordering /* not : public UObject because all methods are static */ { 141public: 142 static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode, 143 LEUnicode *outChars, LEGlyphStorage &glyphStorage); 144 145 static const FeatureMap *getFeatureMap(le_int32 &count); 146 147private: 148 // do not instantiate 149 KhmerReordering(); 150 151 static le_int32 findSyllable(const KhmerClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount); 152 153}; 154 155 156U_NAMESPACE_END 157#endif 158