1/*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
3 *
4 * This code is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License version 2 only, as
6 * published by the Free Software Foundation.  Oracle designates this
7 * particular file as subject to the "Classpath" exception as provided
8 * by Oracle in the LICENSE file that accompanied this code.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26/*
27 * (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved
28 *
29 * This file is a modification of the ICU file IndicReordering.h
30 * by Jens Herden and Javier Sola for Khmer language
31 *
32 */
33
34#ifndef __KHMERREORDERING_H
35#define __KHMERREORDERING_H
36
37/**
38 * \file
39 * \internal
40 */
41
42#include "LETypes.h"
43#include "OpenTypeTables.h"
44
45U_NAMESPACE_BEGIN
46
47class LEGlyphStorage;
48
49// Vocabulary
50//     Base ->         A consonant or an independent vowel in its full (not subscript) form. It is the
51//                     center of the syllable, it can be souranded by coeng (subscript) consonants, vowels,
52//                     split vowels, signs... but there is only one base in a syllable, it has to be coded as
53//                     the first character of the syllable.
54//     split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant).
55//                     Khmer language has five of them. Khmer split vowels either have one part before the
56//                     base and one after the base or they have a part before the base and a part above the base.
57//                     The first part of all Khmer split vowels is the same character, identical to
58//                     the glyph of Khmer dependent vowel SRA EI
59//     coeng -->  modifier used in Khmer to construct coeng (subscript) consonants
60//                Differently than indian languages, the coeng modifies the consonant that follows it,
61//                not the one preceding it  Each consonant has two forms, the base form and the subscript form
62//                the base form is the normal one (using the consonants code-point), the subscript form is
63//                displayed when the combination coeng + consonant is encountered.
64//     Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant
65//     Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO)
66//     Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA)
67//     Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds
68//                          if it is attached to a consonant of the first series or a consonant of the second series
69//                          Most consonants have an equivalent in the other series, but some of theme exist only in
70//                          one series (for example SA). If we want to use the consonant SA with a vowel sound that
71//                          can only be done with a vowel sound that corresponds to a vowel accompanying a consonant
72//                          of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN
73//                          x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and
74//                          MUSIKATOAN a second series consonant to have a first series vowel sound.
75//                          Consonant shifter are both normally supercript marks, but, when they are followed by a
76//                          superscript, they change shape and take the form of subscript dependent vowel SRA U.
77//                          If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they
78//                          should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should
79//                          be placed after the coeng consonant.
80//     Dependent vowel ->   In khmer dependent vowels can be placed above, below, before or after the base
81//                          Each vowel has its own position. Only one vowel per syllable is allowed.
82//     Signs            ->  Khmer has above signs and post signs. Only one above sign and/or one post sign are
83//                          Allowed in a syllable.
84//
85//
86
87struct KhmerClassTable    // This list must include all types of components that can be used inside a syllable
88{
89    enum CharClassValues  // order is important here! This order must be the same that is found in each horizontal
90                          // line in the statetable for Khmer (file KhmerReordering.cpp).
91    {
92        CC_RESERVED             =  0,
93        CC_CONSONANT            =  1, // consonant of type 1 or independent vowel
94        CC_CONSONANT2           =  2, // Consonant of type 2
95        CC_CONSONANT3           =  3, // Consonant of type 3
96        CC_ZERO_WIDTH_NJ_MARK   =  4, // Zero Width non joiner character (0x200C)
97        CC_CONSONANT_SHIFTER    =  5,
98        CC_ROBAT                =  6, // Khmer special diacritic accent -treated differently in state table
99        CC_COENG                =  7, // Subscript consonant combining character
100        CC_DEPENDENT_VOWEL      =  8,
101        CC_SIGN_ABOVE           =  9,
102        CC_SIGN_AFTER           = 10,
103        CC_ZERO_WIDTH_J_MARK    = 11, // Zero width joiner character
104        CC_COUNT                = 12  // This is the number of character classes
105    };
106
107    enum CharClassFlags
108    {
109        CF_CLASS_MASK    = 0x0000FFFF,
110
111        CF_CONSONANT     = 0x01000000,  // flag to speed up comparing
112        CF_SPLIT_VOWEL   = 0x02000000,  // flag for a split vowel -> the first part is added in front of the syllable
113        CF_DOTTED_CIRCLE = 0x04000000,  // add a dotted circle if a character with this flag is the first in a syllable
114        CF_COENG         = 0x08000000,  // flag to speed up comparing
115        CF_SHIFTER       = 0x10000000,  // flag to speed up comparing
116        CF_ABOVE_VOWEL   = 0x20000000,  // flag to speed up comparing
117
118        // position flags
119        CF_POS_BEFORE    = 0x00080000,
120        CF_POS_BELOW     = 0x00040000,
121        CF_POS_ABOVE     = 0x00020000,
122        CF_POS_AFTER     = 0x00010000,
123        CF_POS_MASK      = 0x000f0000
124    };
125
126    typedef le_uint32 CharClass;
127
128    typedef le_int32 ScriptFlags;
129
130    LEUnicode firstChar;   // for Khmer this will become x1780
131    LEUnicode lastChar;    //  and this x17DF
132    const CharClass *classTable;
133
134    CharClass getCharClass(LEUnicode ch) const;
135
136    static const KhmerClassTable *getKhmerClassTable();
137};
138
139
140class KhmerReordering /* not : public UObject because all methods are static */ {
141public:
142    static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode,
143        LEUnicode *outChars, LEGlyphStorage &glyphStorage);
144
145    static const FeatureMap *getFeatureMap(le_int32 &count);
146
147private:
148    // do not instantiate
149    KhmerReordering();
150
151    static le_int32 findSyllable(const KhmerClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount);
152
153};
154
155
156U_NAMESPACE_END
157#endif
158