1/*************************************************
2*          Unicode Property Table handler        *
3*************************************************/
4
5#ifndef _UCP_H
6#define _UCP_H
7
8/* This file contains definitions of the property values that are returned by
9the UCD access macros. New values that are added for new releases of Unicode
10should always be at the end of each enum, for backwards compatibility.
11
12IMPORTANT: Note also that the specific numeric values of the enums have to be
13the same as the values that are generated by the maint/MultiStage2.py script,
14where the equivalent property descriptive names are listed in vectors.
15
16ALSO: The specific values of the first two enums are assumed for the table
17called catposstab in pcre_compile.c. */
18
19/* These are the general character categories. */
20
21enum {
22  ucp_C,     /* Other */
23  ucp_L,     /* Letter */
24  ucp_M,     /* Mark */
25  ucp_N,     /* Number */
26  ucp_P,     /* Punctuation */
27  ucp_S,     /* Symbol */
28  ucp_Z      /* Separator */
29};
30
31/* These are the particular character categories. */
32
33enum {
34  ucp_Cc,    /* Control */
35  ucp_Cf,    /* Format */
36  ucp_Cn,    /* Unassigned */
37  ucp_Co,    /* Private use */
38  ucp_Cs,    /* Surrogate */
39  ucp_Ll,    /* Lower case letter */
40  ucp_Lm,    /* Modifier letter */
41  ucp_Lo,    /* Other letter */
42  ucp_Lt,    /* Title case letter */
43  ucp_Lu,    /* Upper case letter */
44  ucp_Mc,    /* Spacing mark */
45  ucp_Me,    /* Enclosing mark */
46  ucp_Mn,    /* Non-spacing mark */
47  ucp_Nd,    /* Decimal number */
48  ucp_Nl,    /* Letter number */
49  ucp_No,    /* Other number */
50  ucp_Pc,    /* Connector punctuation */
51  ucp_Pd,    /* Dash punctuation */
52  ucp_Pe,    /* Close punctuation */
53  ucp_Pf,    /* Final punctuation */
54  ucp_Pi,    /* Initial punctuation */
55  ucp_Po,    /* Other punctuation */
56  ucp_Ps,    /* Open punctuation */
57  ucp_Sc,    /* Currency symbol */
58  ucp_Sk,    /* Modifier symbol */
59  ucp_Sm,    /* Mathematical symbol */
60  ucp_So,    /* Other symbol */
61  ucp_Zl,    /* Line separator */
62  ucp_Zp,    /* Paragraph separator */
63  ucp_Zs     /* Space separator */
64};
65
66/* These are grapheme break properties. Note that the code for processing them
67assumes that the values are less than 16. If more values are added that take
68the number to 16 or more, the code will have to be rewritten. */
69
70enum {
71  ucp_gbCR,                /*  0 */
72  ucp_gbLF,                /*  1 */
73  ucp_gbControl,           /*  2 */
74  ucp_gbExtend,            /*  3 */
75  ucp_gbPrepend,           /*  4 */
76  ucp_gbSpacingMark,       /*  5 */
77  ucp_gbL,                 /*  6 Hangul syllable type L */
78  ucp_gbV,                 /*  7 Hangul syllable type V */
79  ucp_gbT,                 /*  8 Hangul syllable type T */
80  ucp_gbLV,                /*  9 Hangul syllable type LV */
81  ucp_gbLVT,               /* 10 Hangul syllable type LVT */
82  ucp_gbRegionalIndicator, /* 11 */
83  ucp_gbOther              /* 12 */
84};
85
86/* These are the script identifications. */
87
88enum {
89  ucp_Arabic,
90  ucp_Armenian,
91  ucp_Bengali,
92  ucp_Bopomofo,
93  ucp_Braille,
94  ucp_Buginese,
95  ucp_Buhid,
96  ucp_Canadian_Aboriginal,
97  ucp_Cherokee,
98  ucp_Common,
99  ucp_Coptic,
100  ucp_Cypriot,
101  ucp_Cyrillic,
102  ucp_Deseret,
103  ucp_Devanagari,
104  ucp_Ethiopic,
105  ucp_Georgian,
106  ucp_Glagolitic,
107  ucp_Gothic,
108  ucp_Greek,
109  ucp_Gujarati,
110  ucp_Gurmukhi,
111  ucp_Han,
112  ucp_Hangul,
113  ucp_Hanunoo,
114  ucp_Hebrew,
115  ucp_Hiragana,
116  ucp_Inherited,
117  ucp_Kannada,
118  ucp_Katakana,
119  ucp_Kharoshthi,
120  ucp_Khmer,
121  ucp_Lao,
122  ucp_Latin,
123  ucp_Limbu,
124  ucp_Linear_B,
125  ucp_Malayalam,
126  ucp_Mongolian,
127  ucp_Myanmar,
128  ucp_New_Tai_Lue,
129  ucp_Ogham,
130  ucp_Old_Italic,
131  ucp_Old_Persian,
132  ucp_Oriya,
133  ucp_Osmanya,
134  ucp_Runic,
135  ucp_Shavian,
136  ucp_Sinhala,
137  ucp_Syloti_Nagri,
138  ucp_Syriac,
139  ucp_Tagalog,
140  ucp_Tagbanwa,
141  ucp_Tai_Le,
142  ucp_Tamil,
143  ucp_Telugu,
144  ucp_Thaana,
145  ucp_Thai,
146  ucp_Tibetan,
147  ucp_Tifinagh,
148  ucp_Ugaritic,
149  ucp_Yi,
150  /* New for Unicode 5.0: */
151  ucp_Balinese,
152  ucp_Cuneiform,
153  ucp_Nko,
154  ucp_Phags_Pa,
155  ucp_Phoenician,
156  /* New for Unicode 5.1: */
157  ucp_Carian,
158  ucp_Cham,
159  ucp_Kayah_Li,
160  ucp_Lepcha,
161  ucp_Lycian,
162  ucp_Lydian,
163  ucp_Ol_Chiki,
164  ucp_Rejang,
165  ucp_Saurashtra,
166  ucp_Sundanese,
167  ucp_Vai,
168  /* New for Unicode 5.2: */
169  ucp_Avestan,
170  ucp_Bamum,
171  ucp_Egyptian_Hieroglyphs,
172  ucp_Imperial_Aramaic,
173  ucp_Inscriptional_Pahlavi,
174  ucp_Inscriptional_Parthian,
175  ucp_Javanese,
176  ucp_Kaithi,
177  ucp_Lisu,
178  ucp_Meetei_Mayek,
179  ucp_Old_South_Arabian,
180  ucp_Old_Turkic,
181  ucp_Samaritan,
182  ucp_Tai_Tham,
183  ucp_Tai_Viet,
184  /* New for Unicode 6.0.0: */
185  ucp_Batak,
186  ucp_Brahmi,
187  ucp_Mandaic,
188  /* New for Unicode 6.1.0: */
189  ucp_Chakma,
190  ucp_Meroitic_Cursive,
191  ucp_Meroitic_Hieroglyphs,
192  ucp_Miao,
193  ucp_Sharada,
194  ucp_Sora_Sompeng,
195  ucp_Takri,
196  /* New for Unicode 7.0.0: */
197  ucp_Bassa_Vah,
198  ucp_Caucasian_Albanian,
199  ucp_Duployan,
200  ucp_Elbasan,
201  ucp_Grantha,
202  ucp_Khojki,
203  ucp_Khudawadi,
204  ucp_Linear_A,
205  ucp_Mahajani,
206  ucp_Manichaean,
207  ucp_Mende_Kikakui,
208  ucp_Modi,
209  ucp_Mro,
210  ucp_Nabataean,
211  ucp_Old_North_Arabian,
212  ucp_Old_Permic,
213  ucp_Pahawh_Hmong,
214  ucp_Palmyrene,
215  ucp_Psalter_Pahlavi,
216  ucp_Pau_Cin_Hau,
217  ucp_Siddham,
218  ucp_Tirhuta,
219  ucp_Warang_Citi
220};
221
222#endif
223
224/* End of ucp.h */
225