1/*
2 * regc_locale.c --
3 *
4 *	This file contains the Unicode locale specific regexp routines.
5 *	This file is #included by regcomp.c.
6 *
7 * Copyright (c) 1998 by Scriptics Corporation.
8 *
9 * See the file "license.terms" for information on usage and redistribution
10 * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
11 *
12 * RCS: @(#) $Id: regc_locale.c,v 1.10 2002/07/29 12:27:51 dkf Exp $
13 */
14
15/* ASCII character-name table */
16
17static struct cname {
18    char *name;
19    char code;
20} cnames[] = {
21    {"NUL",		'\0'},
22    {"SOH",		'\001'},
23    {"STX",		'\002'},
24    {"ETX",		'\003'},
25    {"EOT",		'\004'},
26    {"ENQ",		'\005'},
27    {"ACK",		'\006'},
28    {"BEL",		'\007'},
29    {"alert",		'\007'},
30    {"BS",		'\010'},
31    {"backspace",	'\b'},
32    {"HT",		'\011'},
33    {"tab",		'\t'},
34    {"LF",		'\012'},
35    {"newline",		'\n'},
36    {"VT",		'\013'},
37    {"vertical-tab",	'\v'},
38    {"FF",		'\014'},
39    {"form-feed",	'\f'},
40    {"CR",		'\015'},
41    {"carriage-return",	'\r'},
42    {"SO",		'\016'},
43    {"SI",		'\017'},
44    {"DLE",		'\020'},
45    {"DC1",		'\021'},
46    {"DC2",		'\022'},
47    {"DC3",		'\023'},
48    {"DC4",		'\024'},
49    {"NAK",		'\025'},
50    {"SYN",		'\026'},
51    {"ETB",		'\027'},
52    {"CAN",		'\030'},
53    {"EM",		'\031'},
54    {"SUB",		'\032'},
55    {"ESC",		'\033'},
56    {"IS4",		'\034'},
57    {"FS",		'\034'},
58    {"IS3",		'\035'},
59    {"GS",		'\035'},
60    {"IS2",		'\036'},
61    {"RS",		'\036'},
62    {"IS1",		'\037'},
63    {"US",		'\037'},
64    {"space",		' '},
65    {"exclamation-mark",'!'},
66    {"quotation-mark",	'"'},
67    {"number-sign",	'#'},
68    {"dollar-sign",	'$'},
69    {"percent-sign",	'%'},
70    {"ampersand",	'&'},
71    {"apostrophe",	'\''},
72    {"left-parenthesis",'('},
73    {"right-parenthesis", ')'},
74    {"asterisk",	'*'},
75    {"plus-sign",	'+'},
76    {"comma",		','},
77    {"hyphen",		'-'},
78    {"hyphen-minus",	'-'},
79    {"period",		'.'},
80    {"full-stop",	'.'},
81    {"slash",		'/'},
82    {"solidus",		'/'},
83    {"zero",		'0'},
84    {"one",		'1'},
85    {"two",		'2'},
86    {"three",		'3'},
87    {"four",		'4'},
88    {"five",		'5'},
89    {"six",		'6'},
90    {"seven",		'7'},
91    {"eight",		'8'},
92    {"nine",		'9'},
93    {"colon",		':'},
94    {"semicolon",	';'},
95    {"less-than-sign",	'<'},
96    {"equals-sign",	'='},
97    {"greater-than-sign", '>'},
98    {"question-mark",	'?'},
99    {"commercial-at",	'@'},
100    {"left-square-bracket", '['},
101    {"backslash",	'\\'},
102    {"reverse-solidus",	'\\'},
103    {"right-square-bracket", ']'},
104    {"circumflex",	'^'},
105    {"circumflex-accent", '^'},
106    {"underscore",	'_'},
107    {"low-line",	'_'},
108    {"grave-accent",	'`'},
109    {"left-brace",	'{'},
110    {"left-curly-bracket", '{'},
111    {"vertical-line",	'|'},
112    {"right-brace",	'}'},
113    {"right-curly-bracket", '}'},
114    {"tilde",		'~'},
115    {"DEL",		'\177'},
116    {NULL,		0}
117};
118
119/* Unicode character-class tables */
120
121typedef struct crange {
122    chr start;
123    chr end;
124} crange;
125
126/*
127 *	Declarations of Unicode character ranges.  This code
128 *	is automatically generated by the tools/uniClass.tcl script
129 *	and used in generic/regc_locale.c.  Do not modify by hand.
130 */
131
132/* Unicode: alphabetic characters */
133
134static crange alphaRangeTable[] = {
135    {0x0041, 0x005a}, {0x0061, 0x007a}, {0x00c0, 0x00d6}, {0x00d8, 0x00f6},
136    {0x00f8, 0x021f}, {0x0222, 0x0233}, {0x0250, 0x02ad}, {0x02b0, 0x02b8},
137    {0x02bb, 0x02c1}, {0x02e0, 0x02e4}, {0x0388, 0x038a}, {0x038e, 0x03a1},
138    {0x03a3, 0x03ce}, {0x03d0, 0x03d7}, {0x03da, 0x03f5}, {0x0400, 0x0481},
139    {0x048c, 0x04c4}, {0x04d0, 0x04f5}, {0x0531, 0x0556}, {0x0561, 0x0587},
140    {0x05d0, 0x05ea}, {0x05f0, 0x05f2}, {0x0621, 0x063a}, {0x0640, 0x064a},
141    {0x0671, 0x06d3}, {0x06fa, 0x06fc}, {0x0712, 0x072c}, {0x0780, 0x07a5},
142    {0x0905, 0x0939}, {0x0958, 0x0961}, {0x0985, 0x098c}, {0x0993, 0x09a8},
143    {0x09aa, 0x09b0}, {0x09b6, 0x09b9}, {0x09df, 0x09e1}, {0x0a05, 0x0a0a},
144    {0x0a13, 0x0a28}, {0x0a2a, 0x0a30}, {0x0a59, 0x0a5c}, {0x0a72, 0x0a74},
145    {0x0a85, 0x0a8b}, {0x0a8f, 0x0a91}, {0x0a93, 0x0aa8}, {0x0aaa, 0x0ab0},
146    {0x0ab5, 0x0ab9}, {0x0b05, 0x0b0c}, {0x0b13, 0x0b28}, {0x0b2a, 0x0b30},
147    {0x0b36, 0x0b39}, {0x0b5f, 0x0b61}, {0x0b85, 0x0b8a}, {0x0b8e, 0x0b90},
148    {0x0b92, 0x0b95}, {0x0ba8, 0x0baa}, {0x0bae, 0x0bb5}, {0x0bb7, 0x0bb9},
149    {0x0c05, 0x0c0c}, {0x0c0e, 0x0c10}, {0x0c12, 0x0c28}, {0x0c2a, 0x0c33},
150    {0x0c35, 0x0c39}, {0x0c85, 0x0c8c}, {0x0c8e, 0x0c90}, {0x0c92, 0x0ca8},
151    {0x0caa, 0x0cb3}, {0x0cb5, 0x0cb9}, {0x0d05, 0x0d0c}, {0x0d0e, 0x0d10},
152    {0x0d12, 0x0d28}, {0x0d2a, 0x0d39}, {0x0d85, 0x0d96}, {0x0d9a, 0x0db1},
153    {0x0db3, 0x0dbb}, {0x0dc0, 0x0dc6}, {0x0e01, 0x0e30}, {0x0e40, 0x0e46},
154    {0x0e94, 0x0e97}, {0x0e99, 0x0e9f}, {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb0},
155    {0x0ec0, 0x0ec4}, {0x0f40, 0x0f47}, {0x0f49, 0x0f6a}, {0x0f88, 0x0f8b},
156    {0x1000, 0x1021}, {0x1023, 0x1027}, {0x1050, 0x1055}, {0x10a0, 0x10c5},
157    {0x10d0, 0x10f6}, {0x1100, 0x1159}, {0x115f, 0x11a2}, {0x11a8, 0x11f9},
158    {0x1200, 0x1206}, {0x1208, 0x1246}, {0x124a, 0x124d}, {0x1250, 0x1256},
159    {0x125a, 0x125d}, {0x1260, 0x1286}, {0x128a, 0x128d}, {0x1290, 0x12ae},
160    {0x12b2, 0x12b5}, {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12ce},
161    {0x12d0, 0x12d6}, {0x12d8, 0x12ee}, {0x12f0, 0x130e}, {0x1312, 0x1315},
162    {0x1318, 0x131e}, {0x1320, 0x1346}, {0x1348, 0x135a}, {0x13a0, 0x13f4},
163    {0x1401, 0x166c}, {0x166f, 0x1676}, {0x1681, 0x169a}, {0x16a0, 0x16ea},
164    {0x1780, 0x17b3}, {0x1820, 0x1877}, {0x1880, 0x18a8}, {0x1e00, 0x1e9b},
165    {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15}, {0x1f18, 0x1f1d}, {0x1f20, 0x1f45},
166    {0x1f48, 0x1f4d}, {0x1f50, 0x1f57}, {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4},
167    {0x1fb6, 0x1fbc}, {0x1fc2, 0x1fc4}, {0x1fc6, 0x1fcc}, {0x1fd0, 0x1fd3},
168    {0x1fd6, 0x1fdb}, {0x1fe0, 0x1fec}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffc},
169    {0x210a, 0x2113}, {0x2119, 0x211d}, {0x212a, 0x212d}, {0x212f, 0x2131},
170    {0x2133, 0x2139}, {0x3031, 0x3035}, {0x3041, 0x3094}, {0x30a1, 0x30fa},
171    {0x30fc, 0x30fe}, {0x3105, 0x312c}, {0x3131, 0x318e}, {0x31a0, 0x31b7},
172    {0x3400, 0x4db5}, {0x4e00, 0x9fa5}, {0xa000, 0xa48c}, {0xac00, 0xd7a3},
173    {0xf900, 0xfa2d}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1f, 0xfb28},
174    {0xfb2a, 0xfb36}, {0xfb38, 0xfb3c}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfd3d},
175    {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb}, {0xfe70, 0xfe72},
176    {0xfe76, 0xfefc}, {0xff21, 0xff3a}, {0xff41, 0xff5a}, {0xff66, 0xffbe},
177    {0xffc2, 0xffc7}, {0xffca, 0xffcf}, {0xffd2, 0xffd7}, {0xffda, 0xffdc}
178};
179
180#define NUM_ALPHA_RANGE (sizeof(alphaRangeTable)/sizeof(crange))
181
182static chr alphaCharTable[] = {
183    0x00aa, 0x00b5, 0x00ba, 0x02d0, 0x02d1, 0x02ee, 0x037a, 0x0386, 0x038c,
184    0x04c7, 0x04c8, 0x04cb, 0x04cc, 0x04f8, 0x04f9, 0x0559, 0x06d5, 0x06e5,
185    0x06e6, 0x0710, 0x093d, 0x0950, 0x098f, 0x0990, 0x09b2, 0x09dc, 0x09dd,
186    0x09f0, 0x09f1, 0x0a0f, 0x0a10, 0x0a32, 0x0a33, 0x0a35, 0x0a36, 0x0a38,
187    0x0a39, 0x0a5e, 0x0a8d, 0x0ab2, 0x0ab3, 0x0abd, 0x0ad0, 0x0ae0, 0x0b0f,
188    0x0b10, 0x0b32, 0x0b33, 0x0b3d, 0x0b5c, 0x0b5d, 0x0b99, 0x0b9a, 0x0b9c,
189    0x0b9e, 0x0b9f, 0x0ba3, 0x0ba4, 0x0c60, 0x0c61, 0x0cde, 0x0ce0, 0x0ce1,
190    0x0d60, 0x0d61, 0x0dbd, 0x0e32, 0x0e33, 0x0e81, 0x0e82, 0x0e84, 0x0e87,
191    0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0eb2, 0x0eb3,
192    0x0ebd, 0x0ec6, 0x0edc, 0x0edd, 0x0f00, 0x1029, 0x102a, 0x1248, 0x1258,
193    0x1288, 0x12b0, 0x12c0, 0x1310, 0x1f59, 0x1f5b, 0x1f5d, 0x1fbe, 0x207f,
194    0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x3005, 0x3006, 0x309d,
195    0x309e, 0xfb1d, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfe74, 0xfffe
196};
197
198#define NUM_ALPHA_CHAR (sizeof(alphaCharTable)/sizeof(chr))
199
200/* Unicode: decimal digit characters */
201
202static crange digitRangeTable[] = {
203    {0x0030, 0x0039}, {0x0660, 0x0669}, {0x06f0, 0x06f9}, {0x0966, 0x096f},
204    {0x09e6, 0x09ef}, {0x0a66, 0x0a6f}, {0x0ae6, 0x0aef}, {0x0b66, 0x0b6f},
205    {0x0be7, 0x0bef}, {0x0c66, 0x0c6f}, {0x0ce6, 0x0cef}, {0x0d66, 0x0d6f},
206    {0x0e50, 0x0e59}, {0x0ed0, 0x0ed9}, {0x0f20, 0x0f29}, {0x1040, 0x1049},
207    {0x1369, 0x1371}, {0x17e0, 0x17e9}, {0x1810, 0x1819}, {0xff10, 0xff19}
208};
209
210#define NUM_DIGIT_RANGE (sizeof(digitRangeTable)/sizeof(crange))
211
212/* no singletons of digit characters */
213
214/* Unicode: punctuation characters */
215
216static crange punctRangeTable[] = {
217    {0x0021, 0x0023}, {0x0025, 0x002a}, {0x002c, 0x002f}, {0x005b, 0x005d},
218    {0x055a, 0x055f}, {0x066a, 0x066d}, {0x0700, 0x070d}, {0x0f04, 0x0f12},
219    {0x0f3a, 0x0f3d}, {0x104a, 0x104f}, {0x1361, 0x1368}, {0x16eb, 0x16ed},
220    {0x17d4, 0x17da}, {0x1800, 0x180a}, {0x2010, 0x2027}, {0x2030, 0x2043},
221    {0x2048, 0x204d}, {0x3001, 0x3003}, {0x3008, 0x3011}, {0x3014, 0x301f},
222    {0xfe30, 0xfe44}, {0xfe49, 0xfe52}, {0xfe54, 0xfe61}, {0xff01, 0xff03},
223    {0xff05, 0xff0a}, {0xff0c, 0xff0f}, {0xff3b, 0xff3d}, {0xff61, 0xff65}
224};
225
226#define NUM_PUNCT_RANGE (sizeof(punctRangeTable)/sizeof(crange))
227
228static chr punctCharTable[] = {
229    0x003a, 0x003b, 0x003f, 0x0040, 0x005f, 0x007b, 0x007d, 0x00a1, 0x00ab,
230    0x00ad, 0x00b7, 0x00bb, 0x00bf, 0x037e, 0x0387, 0x0589, 0x058a, 0x05be,
231    0x05c0, 0x05c3, 0x05f3, 0x05f4, 0x060c, 0x061b, 0x061f, 0x06d4, 0x0964,
232    0x0965, 0x0970, 0x0df4, 0x0e4f, 0x0e5a, 0x0e5b, 0x0f85, 0x10fb, 0x166d,
233    0x166e, 0x169b, 0x169c, 0x17dc, 0x2045, 0x2046, 0x207d, 0x207e, 0x208d,
234    0x208e, 0x2329, 0x232a, 0x3030, 0x30fb, 0xfd3e, 0xfd3f, 0xfe63, 0xfe68,
235    0xfe6a, 0xfe6b, 0xff1a, 0xff1b, 0xff1f, 0xff20, 0xff3f, 0xff5b, 0xff5d
236};
237
238#define NUM_PUNCT_CHAR (sizeof(punctCharTable)/sizeof(chr))
239
240/* Unicode: white space characters */
241
242static crange spaceRangeTable[] = {
243    {0x0009, 0x000d}, {0x2000, 0x200b}
244};
245
246#define NUM_SPACE_RANGE (sizeof(spaceRangeTable)/sizeof(crange))
247
248static chr spaceCharTable[] = {
249    0x0020, 0x00a0, 0x1680, 0x2028, 0x2029, 0x202f, 0x3000
250};
251
252#define NUM_SPACE_CHAR (sizeof(spaceCharTable)/sizeof(chr))
253
254/* Unicode: lowercase characters */
255
256static crange lowerRangeTable[] = {
257    {0x0061, 0x007a}, {0x00df, 0x00f6}, {0x00f8, 0x00ff}, {0x017e, 0x0180},
258    {0x0199, 0x019b}, {0x01bd, 0x01bf}, {0x0250, 0x02ad}, {0x03ac, 0x03ce},
259    {0x03d5, 0x03d7}, {0x03ef, 0x03f3}, {0x0430, 0x045f}, {0x0561, 0x0587},
260    {0x1e95, 0x1e9b}, {0x1f00, 0x1f07}, {0x1f10, 0x1f15}, {0x1f20, 0x1f27},
261    {0x1f30, 0x1f37}, {0x1f40, 0x1f45}, {0x1f50, 0x1f57}, {0x1f60, 0x1f67},
262    {0x1f70, 0x1f7d}, {0x1f80, 0x1f87}, {0x1f90, 0x1f97}, {0x1fa0, 0x1fa7},
263    {0x1fb0, 0x1fb4}, {0x1fc2, 0x1fc4}, {0x1fd0, 0x1fd3}, {0x1fe0, 0x1fe7},
264    {0x1ff2, 0x1ff4}, {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xff41, 0xff5a}
265};
266
267#define NUM_LOWER_RANGE (sizeof(lowerRangeTable)/sizeof(crange))
268
269static chr lowerCharTable[] = {
270    0x00aa, 0x00b5, 0x00ba, 0x0101, 0x0103, 0x0105, 0x0107, 0x0109, 0x010b,
271    0x010d, 0x010f, 0x0111, 0x0113, 0x0115, 0x0117, 0x0119, 0x011b, 0x011d,
272    0x011f, 0x0121, 0x0123, 0x0125, 0x0127, 0x0129, 0x012b, 0x012d, 0x012f,
273    0x0131, 0x0133, 0x0135, 0x0137, 0x0138, 0x013a, 0x013c, 0x013e, 0x0140,
274    0x0142, 0x0144, 0x0146, 0x0148, 0x0149, 0x014b, 0x014d, 0x014f, 0x0151,
275    0x0153, 0x0155, 0x0157, 0x0159, 0x015b, 0x015d, 0x015f, 0x0161, 0x0163,
276    0x0165, 0x0167, 0x0169, 0x016b, 0x016d, 0x016f, 0x0171, 0x0173, 0x0175,
277    0x0177, 0x017a, 0x017c, 0x0183, 0x0185, 0x0188, 0x018c, 0x018d, 0x0192,
278    0x0195, 0x019e, 0x01a1, 0x01a3, 0x01a5, 0x01a8, 0x01aa, 0x01ab, 0x01ad,
279    0x01b0, 0x01b4, 0x01b6, 0x01b9, 0x01ba, 0x01c6, 0x01c9, 0x01cc, 0x01ce,
280    0x01d0, 0x01d2, 0x01d4, 0x01d6, 0x01d8, 0x01da, 0x01dc, 0x01dd, 0x01df,
281    0x01e1, 0x01e3, 0x01e5, 0x01e7, 0x01e9, 0x01eb, 0x01ed, 0x01ef, 0x01f0,
282    0x01f3, 0x01f5, 0x01f9, 0x01fb, 0x01fd, 0x01ff, 0x0201, 0x0203, 0x0205,
283    0x0207, 0x0209, 0x020b, 0x020d, 0x020f, 0x0211, 0x0213, 0x0215, 0x0217,
284    0x0219, 0x021b, 0x021d, 0x021f, 0x0223, 0x0225, 0x0227, 0x0229, 0x022b,
285    0x022d, 0x022f, 0x0231, 0x0233, 0x0390, 0x03d0, 0x03d1, 0x03db, 0x03dd,
286    0x03df, 0x03e1, 0x03e3, 0x03e5, 0x03e7, 0x03e9, 0x03eb, 0x03ed, 0x03f5,
287    0x0461, 0x0463, 0x0465, 0x0467, 0x0469, 0x046b, 0x046d, 0x046f, 0x0471,
288    0x0473, 0x0475, 0x0477, 0x0479, 0x047b, 0x047d, 0x047f, 0x0481, 0x048d,
289    0x048f, 0x0491, 0x0493, 0x0495, 0x0497, 0x0499, 0x049b, 0x049d, 0x049f,
290    0x04a1, 0x04a3, 0x04a5, 0x04a7, 0x04a9, 0x04ab, 0x04ad, 0x04af, 0x04b1,
291    0x04b3, 0x04b5, 0x04b7, 0x04b9, 0x04bb, 0x04bd, 0x04bf, 0x04c2, 0x04c4,
292    0x04c8, 0x04cc, 0x04d1, 0x04d3, 0x04d5, 0x04d7, 0x04d9, 0x04db, 0x04dd,
293    0x04df, 0x04e1, 0x04e3, 0x04e5, 0x04e7, 0x04e9, 0x04eb, 0x04ed, 0x04ef,
294    0x04f1, 0x04f3, 0x04f5, 0x04f9, 0x1e01, 0x1e03, 0x1e05, 0x1e07, 0x1e09,
295    0x1e0b, 0x1e0d, 0x1e0f, 0x1e11, 0x1e13, 0x1e15, 0x1e17, 0x1e19, 0x1e1b,
296    0x1e1d, 0x1e1f, 0x1e21, 0x1e23, 0x1e25, 0x1e27, 0x1e29, 0x1e2b, 0x1e2d,
297    0x1e2f, 0x1e31, 0x1e33, 0x1e35, 0x1e37, 0x1e39, 0x1e3b, 0x1e3d, 0x1e3f,
298    0x1e41, 0x1e43, 0x1e45, 0x1e47, 0x1e49, 0x1e4b, 0x1e4d, 0x1e4f, 0x1e51,
299    0x1e53, 0x1e55, 0x1e57, 0x1e59, 0x1e5b, 0x1e5d, 0x1e5f, 0x1e61, 0x1e63,
300    0x1e65, 0x1e67, 0x1e69, 0x1e6b, 0x1e6d, 0x1e6f, 0x1e71, 0x1e73, 0x1e75,
301    0x1e77, 0x1e79, 0x1e7b, 0x1e7d, 0x1e7f, 0x1e81, 0x1e83, 0x1e85, 0x1e87,
302    0x1e89, 0x1e8b, 0x1e8d, 0x1e8f, 0x1e91, 0x1e93, 0x1ea1, 0x1ea3, 0x1ea5,
303    0x1ea7, 0x1ea9, 0x1eab, 0x1ead, 0x1eaf, 0x1eb1, 0x1eb3, 0x1eb5, 0x1eb7,
304    0x1eb9, 0x1ebb, 0x1ebd, 0x1ebf, 0x1ec1, 0x1ec3, 0x1ec5, 0x1ec7, 0x1ec9,
305    0x1ecb, 0x1ecd, 0x1ecf, 0x1ed1, 0x1ed3, 0x1ed5, 0x1ed7, 0x1ed9, 0x1edb,
306    0x1edd, 0x1edf, 0x1ee1, 0x1ee3, 0x1ee5, 0x1ee7, 0x1ee9, 0x1eeb, 0x1eed,
307    0x1eef, 0x1ef1, 0x1ef3, 0x1ef5, 0x1ef7, 0x1ef9, 0x1fb6, 0x1fb7, 0x1fbe,
308    0x1fc6, 0x1fc7, 0x1fd6, 0x1fd7, 0x1ff6, 0x1ff7, 0x207f, 0x210a, 0x210e,
309    0x210f, 0x2113, 0x212f, 0x2134, 0x2139
310};
311
312#define NUM_LOWER_CHAR (sizeof(lowerCharTable)/sizeof(chr))
313
314/* Unicode: uppercase characters */
315
316static crange upperRangeTable[] = {
317    {0x0041, 0x005a}, {0x00c0, 0x00d6}, {0x00d8, 0x00de}, {0x0189, 0x018b},
318    {0x018e, 0x0191}, {0x0196, 0x0198}, {0x01b1, 0x01b3}, {0x01f6, 0x01f8},
319    {0x0388, 0x038a}, {0x0391, 0x03a1}, {0x03a3, 0x03ab}, {0x03d2, 0x03d4},
320    {0x0400, 0x042f}, {0x0531, 0x0556}, {0x10a0, 0x10c5}, {0x1f08, 0x1f0f},
321    {0x1f18, 0x1f1d}, {0x1f28, 0x1f2f}, {0x1f38, 0x1f3f}, {0x1f48, 0x1f4d},
322    {0x1f68, 0x1f6f}, {0x1fb8, 0x1fbb}, {0x1fc8, 0x1fcb}, {0x1fd8, 0x1fdb},
323    {0x1fe8, 0x1fec}, {0x1ff8, 0x1ffb}, {0x210b, 0x210d}, {0x2110, 0x2112},
324    {0x2119, 0x211d}, {0x212a, 0x212d}, {0xff21, 0xff3a}
325};
326
327#define NUM_UPPER_RANGE (sizeof(upperRangeTable)/sizeof(crange))
328
329static chr upperCharTable[] = {
330    0x0100, 0x0102, 0x0104, 0x0106, 0x0108, 0x010a, 0x010c, 0x010e, 0x0110,
331    0x0112, 0x0114, 0x0116, 0x0118, 0x011a, 0x011c, 0x011e, 0x0120, 0x0122,
332    0x0124, 0x0126, 0x0128, 0x012a, 0x012c, 0x012e, 0x0130, 0x0132, 0x0134,
333    0x0136, 0x0139, 0x013b, 0x013d, 0x013f, 0x0141, 0x0143, 0x0145, 0x0147,
334    0x014a, 0x014c, 0x014e, 0x0150, 0x0152, 0x0154, 0x0156, 0x0158, 0x015a,
335    0x015c, 0x015e, 0x0160, 0x0162, 0x0164, 0x0166, 0x0168, 0x016a, 0x016c,
336    0x016e, 0x0170, 0x0172, 0x0174, 0x0176, 0x0178, 0x0179, 0x017b, 0x017d,
337    0x0181, 0x0182, 0x0184, 0x0186, 0x0187, 0x0193, 0x0194, 0x019c, 0x019d,
338    0x019f, 0x01a0, 0x01a2, 0x01a4, 0x01a6, 0x01a7, 0x01a9, 0x01ac, 0x01ae,
339    0x01af, 0x01b5, 0x01b7, 0x01b8, 0x01bc, 0x01c4, 0x01c7, 0x01ca, 0x01cd,
340    0x01cf, 0x01d1, 0x01d3, 0x01d5, 0x01d7, 0x01d9, 0x01db, 0x01de, 0x01e0,
341    0x01e2, 0x01e4, 0x01e6, 0x01e8, 0x01ea, 0x01ec, 0x01ee, 0x01f1, 0x01f4,
342    0x01fa, 0x01fc, 0x01fe, 0x0200, 0x0202, 0x0204, 0x0206, 0x0208, 0x020a,
343    0x020c, 0x020e, 0x0210, 0x0212, 0x0214, 0x0216, 0x0218, 0x021a, 0x021c,
344    0x021e, 0x0222, 0x0224, 0x0226, 0x0228, 0x022a, 0x022c, 0x022e, 0x0230,
345    0x0232, 0x0386, 0x038c, 0x038e, 0x038f, 0x03da, 0x03dc, 0x03de, 0x03e0,
346    0x03e2, 0x03e4, 0x03e6, 0x03e8, 0x03ea, 0x03ec, 0x03ee, 0x03f4, 0x0460,
347    0x0462, 0x0464, 0x0466, 0x0468, 0x046a, 0x046c, 0x046e, 0x0470, 0x0472,
348    0x0474, 0x0476, 0x0478, 0x047a, 0x047c, 0x047e, 0x0480, 0x048c, 0x048e,
349    0x0490, 0x0492, 0x0494, 0x0496, 0x0498, 0x049a, 0x049c, 0x049e, 0x04a0,
350    0x04a2, 0x04a4, 0x04a6, 0x04a8, 0x04aa, 0x04ac, 0x04ae, 0x04b0, 0x04b2,
351    0x04b4, 0x04b6, 0x04b8, 0x04ba, 0x04bc, 0x04be, 0x04c0, 0x04c1, 0x04c3,
352    0x04c7, 0x04cb, 0x04d0, 0x04d2, 0x04d4, 0x04d6, 0x04d8, 0x04da, 0x04dc,
353    0x04de, 0x04e0, 0x04e2, 0x04e4, 0x04e6, 0x04e8, 0x04ea, 0x04ec, 0x04ee,
354    0x04f0, 0x04f2, 0x04f4, 0x04f8, 0x1e00, 0x1e02, 0x1e04, 0x1e06, 0x1e08,
355    0x1e0a, 0x1e0c, 0x1e0e, 0x1e10, 0x1e12, 0x1e14, 0x1e16, 0x1e18, 0x1e1a,
356    0x1e1c, 0x1e1e, 0x1e20, 0x1e22, 0x1e24, 0x1e26, 0x1e28, 0x1e2a, 0x1e2c,
357    0x1e2e, 0x1e30, 0x1e32, 0x1e34, 0x1e36, 0x1e38, 0x1e3a, 0x1e3c, 0x1e3e,
358    0x1e40, 0x1e42, 0x1e44, 0x1e46, 0x1e48, 0x1e4a, 0x1e4c, 0x1e4e, 0x1e50,
359    0x1e52, 0x1e54, 0x1e56, 0x1e58, 0x1e5a, 0x1e5c, 0x1e5e, 0x1e60, 0x1e62,
360    0x1e64, 0x1e66, 0x1e68, 0x1e6a, 0x1e6c, 0x1e6e, 0x1e70, 0x1e72, 0x1e74,
361    0x1e76, 0x1e78, 0x1e7a, 0x1e7c, 0x1e7e, 0x1e80, 0x1e82, 0x1e84, 0x1e86,
362    0x1e88, 0x1e8a, 0x1e8c, 0x1e8e, 0x1e90, 0x1e92, 0x1e94, 0x1ea0, 0x1ea2,
363    0x1ea4, 0x1ea6, 0x1ea8, 0x1eaa, 0x1eac, 0x1eae, 0x1eb0, 0x1eb2, 0x1eb4,
364    0x1eb6, 0x1eb8, 0x1eba, 0x1ebc, 0x1ebe, 0x1ec0, 0x1ec2, 0x1ec4, 0x1ec6,
365    0x1ec8, 0x1eca, 0x1ecc, 0x1ece, 0x1ed0, 0x1ed2, 0x1ed4, 0x1ed6, 0x1ed8,
366    0x1eda, 0x1edc, 0x1ede, 0x1ee0, 0x1ee2, 0x1ee4, 0x1ee6, 0x1ee8, 0x1eea,
367    0x1eec, 0x1eee, 0x1ef0, 0x1ef2, 0x1ef4, 0x1ef6, 0x1ef8, 0x1f59, 0x1f5b,
368    0x1f5d, 0x1f5f, 0x2102, 0x2107, 0x2115, 0x2124, 0x2126, 0x2128, 0x2130,
369    0x2131, 0x2133
370};
371
372#define NUM_UPPER_CHAR (sizeof(upperCharTable)/sizeof(chr))
373
374/* Unicode: unicode print characters excluding space */
375
376static crange graphRangeTable[] = {
377    {0x0021, 0x007e}, {0x00a0, 0x011f}, {0x0121, 0x021f}, {0x0222, 0x0233},
378    {0x0250, 0x02ad}, {0x02b0, 0x02ee}, {0x0300, 0x031f}, {0x0321, 0x034e},
379    {0x0360, 0x0362}, {0x0384, 0x038a}, {0x038e, 0x03a1}, {0x03a3, 0x03ce},
380    {0x03d0, 0x03d7}, {0x03da, 0x03f5}, {0x0400, 0x041f}, {0x0421, 0x0486},
381    {0x048c, 0x04c4}, {0x04d0, 0x04f5}, {0x0531, 0x0556}, {0x0559, 0x055f},
382    {0x0561, 0x0587}, {0x0591, 0x05a1}, {0x05a3, 0x05b9}, {0x05bb, 0x05c4},
383    {0x05d0, 0x05ea}, {0x05f0, 0x05f4}, {0x0621, 0x063a}, {0x0640, 0x0655},
384    {0x0660, 0x066d}, {0x0670, 0x06ed}, {0x06f0, 0x06fe}, {0x0700, 0x070d},
385    {0x0710, 0x071f}, {0x0721, 0x072c}, {0x0730, 0x074a}, {0x0780, 0x07b0},
386    {0x0901, 0x0903}, {0x0905, 0x091f}, {0x0921, 0x0939}, {0x093c, 0x094d},
387    {0x0950, 0x0954}, {0x0958, 0x0970}, {0x0981, 0x0983}, {0x0985, 0x098c},
388    {0x0993, 0x09a8}, {0x09aa, 0x09b0}, {0x09b6, 0x09b9}, {0x09be, 0x09c4},
389    {0x09cb, 0x09cd}, {0x09df, 0x09e3}, {0x09e6, 0x09fa}, {0x0a05, 0x0a0a},
390    {0x0a13, 0x0a1f}, {0x0a21, 0x0a28}, {0x0a2a, 0x0a30}, {0x0a3e, 0x0a42},
391    {0x0a4b, 0x0a4d}, {0x0a59, 0x0a5c}, {0x0a66, 0x0a74}, {0x0a81, 0x0a83},
392    {0x0a85, 0x0a8b}, {0x0a8f, 0x0a91}, {0x0a93, 0x0aa8}, {0x0aaa, 0x0ab0},
393    {0x0ab5, 0x0ab9}, {0x0abc, 0x0ac5}, {0x0ac7, 0x0ac9}, {0x0acb, 0x0acd},
394    {0x0ae6, 0x0aef}, {0x0b01, 0x0b03}, {0x0b05, 0x0b0c}, {0x0b13, 0x0b1f},
395    {0x0b21, 0x0b28}, {0x0b2a, 0x0b30}, {0x0b36, 0x0b39}, {0x0b3c, 0x0b43},
396    {0x0b4b, 0x0b4d}, {0x0b5f, 0x0b61}, {0x0b66, 0x0b70}, {0x0b85, 0x0b8a},
397    {0x0b8e, 0x0b90}, {0x0b92, 0x0b95}, {0x0ba8, 0x0baa}, {0x0bae, 0x0bb5},
398    {0x0bb7, 0x0bb9}, {0x0bbe, 0x0bc2}, {0x0bc6, 0x0bc8}, {0x0bca, 0x0bcd},
399    {0x0be7, 0x0bf2}, {0x0c01, 0x0c03}, {0x0c05, 0x0c0c}, {0x0c0e, 0x0c10},
400    {0x0c12, 0x0c1f}, {0x0c21, 0x0c28}, {0x0c2a, 0x0c33}, {0x0c35, 0x0c39},
401    {0x0c3e, 0x0c44}, {0x0c46, 0x0c48}, {0x0c4a, 0x0c4d}, {0x0c66, 0x0c6f},
402    {0x0c85, 0x0c8c}, {0x0c8e, 0x0c90}, {0x0c92, 0x0ca8}, {0x0caa, 0x0cb3},
403    {0x0cb5, 0x0cb9}, {0x0cbe, 0x0cc4}, {0x0cc6, 0x0cc8}, {0x0cca, 0x0ccd},
404    {0x0ce6, 0x0cef}, {0x0d05, 0x0d0c}, {0x0d0e, 0x0d10}, {0x0d12, 0x0d1f},
405    {0x0d21, 0x0d28}, {0x0d2a, 0x0d39}, {0x0d3e, 0x0d43}, {0x0d46, 0x0d48},
406    {0x0d4a, 0x0d4d}, {0x0d66, 0x0d6f}, {0x0d85, 0x0d96}, {0x0d9a, 0x0db1},
407    {0x0db3, 0x0dbb}, {0x0dc0, 0x0dc6}, {0x0dcf, 0x0dd4}, {0x0dd8, 0x0ddf},
408    {0x0df2, 0x0df4}, {0x0e01, 0x0e1f}, {0x0e21, 0x0e3a}, {0x0e3f, 0x0e5b},
409    {0x0e94, 0x0e97}, {0x0e99, 0x0e9f}, {0x0ea1, 0x0ea3}, {0x0ead, 0x0eb9},
410    {0x0ebb, 0x0ebd}, {0x0ec0, 0x0ec4}, {0x0ec8, 0x0ecd}, {0x0ed0, 0x0ed9},
411    {0x0f00, 0x0f1f}, {0x0f21, 0x0f47}, {0x0f49, 0x0f6a}, {0x0f71, 0x0f8b},
412    {0x0f90, 0x0f97}, {0x0f99, 0x0fbc}, {0x0fbe, 0x0fcc}, {0x1000, 0x101f},
413    {0x1023, 0x1027}, {0x102c, 0x1032}, {0x1036, 0x1039}, {0x1040, 0x1059},
414    {0x10a0, 0x10c5}, {0x10d0, 0x10f6}, {0x1100, 0x111f}, {0x1121, 0x1159},
415    {0x115f, 0x11a2}, {0x11a8, 0x11f9}, {0x1200, 0x1206}, {0x1208, 0x121f},
416    {0x1221, 0x1246}, {0x124a, 0x124d}, {0x1250, 0x1256}, {0x125a, 0x125d},
417    {0x1260, 0x1286}, {0x128a, 0x128d}, {0x1290, 0x12ae}, {0x12b2, 0x12b5},
418    {0x12b8, 0x12be}, {0x12c2, 0x12c5}, {0x12c8, 0x12ce}, {0x12d0, 0x12d6},
419    {0x12d8, 0x12ee}, {0x12f0, 0x130e}, {0x1312, 0x1315}, {0x1318, 0x131e},
420    {0x1321, 0x1346}, {0x1348, 0x135a}, {0x1361, 0x137c}, {0x13a0, 0x13f4},
421    {0x1401, 0x141f}, {0x1421, 0x151f}, {0x1521, 0x161f}, {0x1621, 0x1676},
422    {0x1680, 0x169c}, {0x16a0, 0x16f0}, {0x1780, 0x17dc}, {0x17e0, 0x17e9},
423    {0x1800, 0x180a}, {0x1810, 0x1819}, {0x1821, 0x1877}, {0x1880, 0x18a9},
424    {0x1e00, 0x1e1f}, {0x1e21, 0x1e9b}, {0x1ea0, 0x1ef9}, {0x1f00, 0x1f15},
425    {0x1f18, 0x1f1d}, {0x1f21, 0x1f45}, {0x1f48, 0x1f4d}, {0x1f50, 0x1f57},
426    {0x1f5f, 0x1f7d}, {0x1f80, 0x1fb4}, {0x1fb6, 0x1fc4}, {0x1fc6, 0x1fd3},
427    {0x1fd6, 0x1fdb}, {0x1fdd, 0x1fef}, {0x1ff2, 0x1ff4}, {0x1ff6, 0x1ffe},
428    {0x2000, 0x200b}, {0x2010, 0x201f}, {0x2021, 0x2029}, {0x202f, 0x2046},
429    {0x2048, 0x204d}, {0x2074, 0x208e}, {0x20a0, 0x20af}, {0x20d0, 0x20e3},
430    {0x2100, 0x211f}, {0x2121, 0x213a}, {0x2153, 0x2183}, {0x2190, 0x21f3},
431    {0x2200, 0x221f}, {0x2221, 0x22f1}, {0x2300, 0x231f}, {0x2321, 0x237b},
432    {0x237d, 0x239a}, {0x2400, 0x241f}, {0x2421, 0x2426}, {0x2440, 0x244a},
433    {0x2460, 0x24ea}, {0x2500, 0x251f}, {0x2521, 0x2595}, {0x25a0, 0x25f7},
434    {0x2600, 0x2613}, {0x2619, 0x261f}, {0x2621, 0x2671}, {0x2701, 0x2704},
435    {0x2706, 0x2709}, {0x270c, 0x271f}, {0x2721, 0x2727}, {0x2729, 0x274b},
436    {0x274f, 0x2752}, {0x2758, 0x275e}, {0x2761, 0x2767}, {0x2776, 0x2794},
437    {0x2798, 0x27af}, {0x27b1, 0x27be}, {0x2800, 0x281f}, {0x2821, 0x28ff},
438    {0x2e80, 0x2e99}, {0x2e9b, 0x2ef3}, {0x2f00, 0x2f1f}, {0x2f21, 0x2fd5},
439    {0x2ff0, 0x2ffb}, {0x3000, 0x301f}, {0x3021, 0x303a}, {0x3041, 0x3094},
440    {0x3099, 0x309e}, {0x30a1, 0x30fe}, {0x3105, 0x311f}, {0x3121, 0x312c},
441    {0x3131, 0x318e}, {0x3190, 0x31b7}, {0x3200, 0x321c}, {0x3221, 0x3243},
442    {0x3260, 0x327b}, {0x327f, 0x32b0}, {0x32c0, 0x32cb}, {0x32d0, 0x32fe},
443    {0x3300, 0x331f}, {0x3321, 0x3376}, {0x337b, 0x33dd}, {0x33e0, 0x33fe},
444    {0x3400, 0x341f}, {0x3421, 0x351f}, {0x3521, 0x361f}, {0x3621, 0x371f},
445    {0x3721, 0x381f}, {0x3821, 0x391f}, {0x3921, 0x3a1f}, {0x3a21, 0x3b1f},
446    {0x3b21, 0x3c1f}, {0x3c21, 0x3d1f}, {0x3d21, 0x3e1f}, {0x3e21, 0x3f1f},
447    {0x3f21, 0x401f}, {0x4021, 0x411f}, {0x4121, 0x421f}, {0x4221, 0x431f},
448    {0x4321, 0x441f}, {0x4421, 0x451f}, {0x4521, 0x461f}, {0x4621, 0x471f},
449    {0x4721, 0x481f}, {0x4821, 0x491f}, {0x4921, 0x4a1f}, {0x4a21, 0x4b1f},
450    {0x4b21, 0x4c1f}, {0x4c21, 0x4d1f}, {0x4d21, 0x4db5}, {0x4e00, 0x4e1f},
451    {0x4e21, 0x4f1f}, {0x4f21, 0x501f}, {0x5021, 0x511f}, {0x5121, 0x521f},
452    {0x5221, 0x531f}, {0x5321, 0x541f}, {0x5421, 0x551f}, {0x5521, 0x561f},
453    {0x5621, 0x571f}, {0x5721, 0x581f}, {0x5821, 0x591f}, {0x5921, 0x5a1f},
454    {0x5a21, 0x5b1f}, {0x5b21, 0x5c1f}, {0x5c21, 0x5d1f}, {0x5d21, 0x5e1f},
455    {0x5e21, 0x5f1f}, {0x5f21, 0x601f}, {0x6021, 0x611f}, {0x6121, 0x621f},
456    {0x6221, 0x631f}, {0x6321, 0x641f}, {0x6421, 0x651f}, {0x6521, 0x661f},
457    {0x6621, 0x671f}, {0x6721, 0x681f}, {0x6821, 0x691f}, {0x6921, 0x6a1f},
458    {0x6a21, 0x6b1f}, {0x6b21, 0x6c1f}, {0x6c21, 0x6d1f}, {0x6d21, 0x6e1f},
459    {0x6e21, 0x6f1f}, {0x6f21, 0x701f}, {0x7021, 0x711f}, {0x7121, 0x721f},
460    {0x7221, 0x731f}, {0x7321, 0x741f}, {0x7421, 0x751f}, {0x7521, 0x761f},
461    {0x7621, 0x771f}, {0x7721, 0x781f}, {0x7821, 0x791f}, {0x7921, 0x7a1f},
462    {0x7a21, 0x7b1f}, {0x7b21, 0x7c1f}, {0x7c21, 0x7d1f}, {0x7d21, 0x7e1f},
463    {0x7e21, 0x7f1f}, {0x7f21, 0x801f}, {0x8021, 0x811f}, {0x8121, 0x821f},
464    {0x8221, 0x831f}, {0x8321, 0x841f}, {0x8421, 0x851f}, {0x8521, 0x861f},
465    {0x8621, 0x871f}, {0x8721, 0x881f}, {0x8821, 0x891f}, {0x8921, 0x8a1f},
466    {0x8a21, 0x8b1f}, {0x8b21, 0x8c1f}, {0x8c21, 0x8d1f}, {0x8d21, 0x8e1f},
467    {0x8e21, 0x8f1f}, {0x8f21, 0x901f}, {0x9021, 0x911f}, {0x9121, 0x921f},
468    {0x9221, 0x931f}, {0x9321, 0x941f}, {0x9421, 0x951f}, {0x9521, 0x961f},
469    {0x9621, 0x971f}, {0x9721, 0x981f}, {0x9821, 0x991f}, {0x9921, 0x9a1f},
470    {0x9a21, 0x9b1f}, {0x9b21, 0x9c1f}, {0x9c21, 0x9d1f}, {0x9d21, 0x9e1f},
471    {0x9e21, 0x9f1f}, {0x9f21, 0x9fa5}, {0xa000, 0xa01f}, {0xa021, 0xa11f},
472    {0xa121, 0xa21f}, {0xa221, 0xa31f}, {0xa321, 0xa41f}, {0xa421, 0xa48c},
473    {0xa490, 0xa4a1}, {0xa4a4, 0xa4b3}, {0xa4b5, 0xa4c0}, {0xa4c2, 0xa4c4},
474    {0xac00, 0xac1f}, {0xac21, 0xad1f}, {0xad21, 0xae1f}, {0xae21, 0xaf1f},
475    {0xaf21, 0xb01f}, {0xb021, 0xb11f}, {0xb121, 0xb21f}, {0xb221, 0xb31f},
476    {0xb321, 0xb41f}, {0xb421, 0xb51f}, {0xb521, 0xb61f}, {0xb621, 0xb71f},
477    {0xb721, 0xb81f}, {0xb821, 0xb91f}, {0xb921, 0xba1f}, {0xba21, 0xbb1f},
478    {0xbb21, 0xbc1f}, {0xbc21, 0xbd1f}, {0xbd21, 0xbe1f}, {0xbe21, 0xbf1f},
479    {0xbf21, 0xc01f}, {0xc021, 0xc11f}, {0xc121, 0xc21f}, {0xc221, 0xc31f},
480    {0xc321, 0xc41f}, {0xc421, 0xc51f}, {0xc521, 0xc61f}, {0xc621, 0xc71f},
481    {0xc721, 0xc81f}, {0xc821, 0xc91f}, {0xc921, 0xca1f}, {0xca21, 0xcb1f},
482    {0xcb21, 0xcc1f}, {0xcc21, 0xcd1f}, {0xcd21, 0xce1f}, {0xce21, 0xcf1f},
483    {0xcf21, 0xd01f}, {0xd021, 0xd11f}, {0xd121, 0xd21f}, {0xd221, 0xd31f},
484    {0xd321, 0xd41f}, {0xd421, 0xd51f}, {0xd521, 0xd61f}, {0xd621, 0xd71f},
485    {0xd721, 0xd7a3}, {0xf900, 0xf91f}, {0xf921, 0xfa1f}, {0xfa21, 0xfa2d},
486    {0xfb00, 0xfb06}, {0xfb13, 0xfb17}, {0xfb1d, 0xfb1f}, {0xfb21, 0xfb36},
487    {0xfb38, 0xfb3c}, {0xfb46, 0xfbb1}, {0xfbd3, 0xfc1f}, {0xfc21, 0xfd1f},
488    {0xfd21, 0xfd3f}, {0xfd50, 0xfd8f}, {0xfd92, 0xfdc7}, {0xfdf0, 0xfdfb},
489    {0xfe21, 0xfe23}, {0xfe30, 0xfe44}, {0xfe49, 0xfe52}, {0xfe54, 0xfe66},
490    {0xfe68, 0xfe6b}, {0xfe70, 0xfe72}, {0xfe76, 0xfefc}, {0xff01, 0xff1f},
491    {0xff21, 0xff5e}, {0xff61, 0xffbe}, {0xffc2, 0xffc7}, {0xffca, 0xffcf},
492    {0xffd2, 0xffd7}, {0xffda, 0xffdc}, {0xffe0, 0xffe6}, {0xffe8, 0xffee},
493    {0xfffc, 0xffff}
494};
495
496#define NUM_GRAPH_RANGE (sizeof(graphRangeTable)/sizeof(crange))
497
498static chr graphCharTable[] = {
499    0x0374, 0x0375, 0x037a, 0x037e, 0x038c, 0x0488, 0x0489, 0x04c7, 0x04c8,
500    0x04cb, 0x04cc, 0x04f8, 0x04f9, 0x0589, 0x058a, 0x060c, 0x061b, 0x061f,
501    0x098f, 0x0990, 0x09b2, 0x09bc, 0x09c7, 0x09c8, 0x09d7, 0x09dc, 0x09dd,
502    0x0a02, 0x0a0f, 0x0a10, 0x0a32, 0x0a33, 0x0a35, 0x0a36, 0x0a38, 0x0a39,
503    0x0a3c, 0x0a47, 0x0a48, 0x0a5e, 0x0a8d, 0x0ab2, 0x0ab3, 0x0ad0, 0x0ae0,
504    0x0b0f, 0x0b10, 0x0b32, 0x0b33, 0x0b47, 0x0b48, 0x0b56, 0x0b57, 0x0b5c,
505    0x0b5d, 0x0b82, 0x0b83, 0x0b99, 0x0b9a, 0x0b9c, 0x0b9e, 0x0b9f, 0x0ba3,
506    0x0ba4, 0x0bd7, 0x0c55, 0x0c56, 0x0c60, 0x0c61, 0x0c82, 0x0c83, 0x0cd5,
507    0x0cd6, 0x0cde, 0x0ce0, 0x0ce1, 0x0d02, 0x0d03, 0x0d57, 0x0d60, 0x0d61,
508    0x0d82, 0x0d83, 0x0dbd, 0x0dca, 0x0dd6, 0x0e81, 0x0e82, 0x0e84, 0x0e87,
509    0x0e88, 0x0e8a, 0x0e8d, 0x0ea5, 0x0ea7, 0x0eaa, 0x0eab, 0x0ec6, 0x0edc,
510    0x0edd, 0x0fcf, 0x1021, 0x1029, 0x102a, 0x10fb, 0x1248, 0x1258, 0x1288,
511    0x12b0, 0x12c0, 0x1310, 0x1f59, 0x1f5b, 0x1f5d, 0x2070, 0x274d, 0x2756,
512    0x303e, 0x303f, 0xa4c6, 0xfb3e, 0xfb40, 0xfb41, 0xfb43, 0xfb44, 0xfe74
513};
514
515#define NUM_GRAPH_CHAR (sizeof(graphCharTable)/sizeof(chr))
516
517/*
518 *	End of auto-generated Unicode character ranges declarations.
519 */
520
521#define	CH	NOCELT
522
523/*
524 - nmcces - how many distinct MCCEs are there?
525 ^ static int nmcces(struct vars *);
526 */
527static int
528nmcces(v)
529    struct vars *v;			/* context */
530{
531    /*
532     * No multi-character collating elements defined at the moment.
533     */
534    return 0;
535}
536
537/*
538 - nleaders - how many chrs can be first chrs of MCCEs?
539 ^ static int nleaders(struct vars *);
540 */
541static int
542nleaders(v)
543    struct vars *v;			/* context */
544{
545    return 0;
546}
547
548/*
549 - allmcces - return a cvec with all the MCCEs of the locale
550 ^ static struct cvec *allmcces(struct vars *, struct cvec *);
551 */
552static struct cvec *
553allmcces(v, cv)
554    struct vars *v;			/* context */
555    struct cvec *cv;			/* this is supposed to have enough room */
556{
557    return clearcvec(cv);
558}
559
560/*
561 - element - map collating-element name to celt
562 ^ static celt element(struct vars *, chr *, chr *);
563 */
564static celt
565element(v, startp, endp)
566    struct vars *v;			/* context */
567    chr *startp;			/* points to start of name */
568    chr *endp;				/* points just past end of name */
569{
570    struct cname *cn;
571    size_t len;
572    Tcl_DString ds;
573    CONST char *np;
574
575    /* generic:  one-chr names stand for themselves */
576    assert(startp < endp);
577    len = endp - startp;
578    if (len == 1) {
579	return *startp;
580    }
581
582    NOTE(REG_ULOCALE);
583
584    /* search table */
585    Tcl_DStringInit(&ds);
586    np = Tcl_UniCharToUtfDString(startp, (int)len, &ds);
587    for (cn=cnames; cn->name!=NULL; cn++) {
588	if (strlen(cn->name)==len && strncmp(cn->name, np, len)==0) {
589	    break;			/* NOTE BREAK OUT */
590	}
591    }
592    Tcl_DStringFree(&ds);
593    if (cn->name != NULL) {
594	return CHR(cn->code);
595    }
596
597    /* couldn't find it */
598    ERR(REG_ECOLLATE);
599    return 0;
600}
601
602/*
603 - range - supply cvec for a range, including legality check
604 ^ static struct cvec *range(struct vars *, celt, celt, int);
605 */
606static struct cvec *
607range(v, a, b, cases)
608    struct vars *v;			/* context */
609    celt a;				/* range start */
610    celt b;				/* range end, might equal a */
611    int cases;				/* case-independent? */
612{
613    int nchrs;
614    struct cvec *cv;
615    celt c, lc, uc, tc;
616
617    if (a != b && !before(a, b)) {
618	ERR(REG_ERANGE);
619	return NULL;
620    }
621
622    if (!cases) {			/* easy version */
623	cv = getcvec(v, 0, 1, 0);
624	NOERRN();
625	addrange(cv, a, b);
626	return cv;
627    }
628
629    /*
630     * When case-independent, it's hard to decide when cvec ranges are
631     * usable, so for now at least, we won't try.  We allocate enough
632     * space for two case variants plus a little extra for the two
633     * title case variants.
634     */
635
636    nchrs = (b - a + 1)*2 + 4;
637
638    cv = getcvec(v, nchrs, 0, 0);
639    NOERRN();
640
641    for (c=a; c<=b; c++) {
642	addchr(cv, c);
643	lc = Tcl_UniCharToLower((chr)c);
644	uc = Tcl_UniCharToUpper((chr)c);
645	tc = Tcl_UniCharToTitle((chr)c);
646	if (c != lc) {
647	    addchr(cv, lc);
648	}
649	if (c != uc) {
650	    addchr(cv, uc);
651	}
652	if (c != tc && tc != uc) {
653	    addchr(cv, tc);
654	}
655    }
656
657    return cv;
658}
659
660/*
661 - before - is celt x before celt y, for purposes of range legality?
662 ^ static int before(celt, celt);
663 */
664static int				/* predicate */
665before(x, y)
666    celt x, y;				/* collating elements */
667{
668    /* trivial because no MCCEs */
669    if (x < y) {
670	return 1;
671    }
672    return 0;
673}
674
675/*
676 - eclass - supply cvec for an equivalence class
677 * Must include case counterparts on request.
678 ^ static struct cvec *eclass(struct vars *, celt, int);
679 */
680static struct cvec *
681eclass(v, c, cases)
682    struct vars *v;			/* context */
683    celt c;				/* Collating element representing
684					 * the equivalence class. */
685    int cases;				/* all cases? */
686{
687    struct cvec *cv;
688
689    /* crude fake equivalence class for testing */
690    if ((v->cflags&REG_FAKE) && c == 'x') {
691	cv = getcvec(v, 4, 0, 0);
692	addchr(cv, (chr)'x');
693	addchr(cv, (chr)'y');
694	if (cases) {
695	    addchr(cv, (chr)'X');
696	    addchr(cv, (chr)'Y');
697	}
698	return cv;
699    }
700
701    /* otherwise, none */
702    if (cases) {
703	return allcases(v, c);
704    }
705    cv = getcvec(v, 1, 0, 0);
706    assert(cv != NULL);
707    addchr(cv, (chr)c);
708    return cv;
709}
710
711/*
712 - cclass - supply cvec for a character class
713 * Must include case counterparts on request.
714 ^ static struct cvec *cclass(struct vars *, chr *, chr *, int);
715 */
716static struct cvec *
717cclass(v, startp, endp, cases)
718    struct vars *v;			/* context */
719    chr *startp;			/* where the name starts */
720    chr *endp;				/* just past the end of the name */
721    int cases;				/* case-independent? */
722{
723    size_t len;
724    struct cvec *cv = NULL;
725    Tcl_DString ds;
726    CONST char *np;
727    char **namePtr;
728    int i, index;
729
730    /*
731     * The following arrays define the valid character class names.
732     */
733
734    static char *classNames[] = {
735	"alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
736	"lower", "print", "punct", "space", "upper", "xdigit", NULL
737    };
738
739    enum classes {
740	CC_ALNUM, CC_ALPHA, CC_ASCII, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
741	CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_XDIGIT
742    };
743
744
745    /*
746     * Extract the class name
747     */
748
749    len = endp - startp;
750    Tcl_DStringInit(&ds);
751    np = Tcl_UniCharToUtfDString(startp, (int)len, &ds);
752
753    /*
754     * Remap lower and upper to alpha if the match is case insensitive.
755     */
756
757    if (cases && len == 5 && (strncmp("lower", np, 5) == 0
758	    || strncmp("upper", np, 5) == 0)) {
759	np = "alpha";
760    }
761
762    /*
763     * Map the name to the corresponding enumerated value.
764     */
765
766    index = -1;
767    for (namePtr=classNames,i=0 ; *namePtr!=NULL ; namePtr++,i++) {
768	if ((strlen(*namePtr) == len) && (strncmp(*namePtr, np, len) == 0)) {
769	    index = i;
770	    break;
771	}
772    }
773    Tcl_DStringInit(&ds);
774    if (index == -1) {
775	ERR(REG_ECTYPE);
776	return NULL;
777    }
778
779    /*
780     * Now compute the character class contents.
781     */
782
783    switch((enum classes) index) {
784    case CC_PRINT:
785    case CC_ALNUM:
786	cv = getcvec(v, NUM_ALPHA_CHAR, NUM_DIGIT_RANGE + NUM_ALPHA_RANGE, 0);
787	if (cv) {
788	    for (i=0 ; i<NUM_ALPHA_CHAR ; i++) {
789		addchr(cv, alphaCharTable[i]);
790	    }
791	    for (i=0 ; i<NUM_ALPHA_RANGE ; i++) {
792		addrange(cv, alphaRangeTable[i].start,
793			alphaRangeTable[i].end);
794	    }
795	    for (i=0 ; i<NUM_DIGIT_RANGE ; i++) {
796		addrange(cv, digitRangeTable[i].start,
797			digitRangeTable[i].end);
798	    }
799	}
800	break;
801    case CC_ALPHA:
802	cv = getcvec(v, NUM_ALPHA_CHAR, NUM_ALPHA_RANGE, 0);
803	if (cv) {
804	    for (i=0 ; i<NUM_ALPHA_RANGE ; i++) {
805		addrange(cv, alphaRangeTable[i].start,
806			alphaRangeTable[i].end);
807	    }
808	    for (i=0 ; i<NUM_ALPHA_CHAR ; i++) {
809		addchr(cv, alphaCharTable[i]);
810	    }
811	}
812	break;
813    case CC_ASCII:
814	cv = getcvec(v, 0, 1, 0);
815	if (cv) {
816	    addrange(cv, 0, 0x7f);
817	}
818	break;
819    case CC_BLANK:
820	cv = getcvec(v, 2, 0, 0);
821	addchr(cv, '\t');
822	addchr(cv, ' ');
823	break;
824    case CC_CNTRL:
825	cv = getcvec(v, 0, 2, 0);
826	addrange(cv, 0x0, 0x1f);
827	addrange(cv, 0x7f, 0x9f);
828	break;
829    case CC_DIGIT:
830	cv = getcvec(v, 0, NUM_DIGIT_RANGE, 0);
831	if (cv) {
832	    for (i=0 ; i<NUM_DIGIT_RANGE ; i++) {
833		addrange(cv, digitRangeTable[i].start,
834			digitRangeTable[i].end);
835	    }
836	}
837	break;
838    case CC_PUNCT:
839	cv = getcvec(v, NUM_PUNCT_CHAR, NUM_PUNCT_RANGE, 0);
840	if (cv) {
841	    for (i=0 ; i<NUM_PUNCT_RANGE ; i++) {
842		addrange(cv, punctRangeTable[i].start,
843			punctRangeTable[i].end);
844	    }
845	    for (i=0 ; i<NUM_PUNCT_CHAR ; i++) {
846		addchr(cv, punctCharTable[i]);
847	    }
848	}
849	break;
850    case CC_XDIGIT:
851	/*
852	 * This is a 3 instead of (NUM_DIGIT_RANGE+2) because I've no
853	 * idea how to define the digits 'a' through 'f' in
854	 * non-western locales.  The concept is quite possibly non
855	 * portable, or only used in contextx where the characters
856	 * used would be the western ones anyway!  Whatever is
857	 * actually the case, the number of ranges is fixed (until
858	 * someone comes up with a better arrangement!)
859	 */
860	cv = getcvec(v, 0, 3, 0);
861	if (cv) {
862	    addrange(cv, '0', '9');
863	    addrange(cv, 'a', 'f');
864	    addrange(cv, 'A', 'F');
865	}
866	break;
867    case CC_SPACE:
868	cv = getcvec(v, NUM_SPACE_CHAR, NUM_SPACE_RANGE, 0);
869	if (cv) {
870	    for (i=0 ; i<NUM_SPACE_RANGE ; i++) {
871		addrange(cv, spaceRangeTable[i].start,
872			spaceRangeTable[i].end);
873	    }
874	    for (i=0 ; i<NUM_SPACE_CHAR ; i++) {
875		addchr(cv, spaceCharTable[i]);
876	    }
877	}
878	break;
879    case CC_LOWER:
880	cv  = getcvec(v, NUM_LOWER_CHAR, NUM_LOWER_RANGE, 0);
881	if (cv) {
882	    for (i=0 ; i<NUM_LOWER_RANGE ; i++) {
883		addrange(cv, lowerRangeTable[i].start,
884			lowerRangeTable[i].end);
885	    }
886	    for (i=0 ; i<NUM_LOWER_CHAR ; i++) {
887		addchr(cv, lowerCharTable[i]);
888	    }
889	}
890	break;
891    case CC_UPPER:
892	cv  = getcvec(v, NUM_UPPER_CHAR, NUM_UPPER_RANGE, 0);
893	if (cv) {
894	    for (i=0 ; i<NUM_UPPER_RANGE ; i++) {
895		addrange(cv, upperRangeTable[i].start,
896			upperRangeTable[i].end);
897	    }
898	    for (i=0 ; i<NUM_UPPER_CHAR ; i++) {
899		addchr(cv, upperCharTable[i]);
900	    }
901	}
902	break;
903    case CC_GRAPH:
904	cv  = getcvec(v, NUM_GRAPH_CHAR, NUM_GRAPH_RANGE, 0);
905	if (cv) {
906	    for (i=0 ; i<NUM_GRAPH_RANGE ; i++) {
907		addrange(cv, graphRangeTable[i].start,
908			graphRangeTable[i].end);
909	    }
910	    for (i=0 ; i<NUM_GRAPH_CHAR ; i++) {
911		addchr(cv, graphCharTable[i]);
912	    }
913	}
914	break;
915    }
916    if (cv == NULL) {
917	ERR(REG_ESPACE);
918    }
919    return cv;
920}
921
922/*
923 - allcases - supply cvec for all case counterparts of a chr (including itself)
924 * This is a shortcut, preferably an efficient one, for simple characters;
925 * messy cases are done via range().
926 ^ static struct cvec *allcases(struct vars *, pchr);
927 */
928static struct cvec *
929allcases(v, pc)
930    struct vars *v;			/* context */
931    pchr pc;				/* character to get case equivs of */
932{
933    struct cvec *cv;
934    chr c = (chr)pc;
935    chr lc, uc, tc;
936
937    lc = Tcl_UniCharToLower((chr)c);
938    uc = Tcl_UniCharToUpper((chr)c);
939    tc = Tcl_UniCharToTitle((chr)c);
940
941    if (tc != uc) {
942	cv = getcvec(v, 3, 0, 0);
943	addchr(cv, tc);
944    } else {
945	cv = getcvec(v, 2, 0, 0);
946    }
947    addchr(cv, lc);
948    if (lc != uc) {
949	addchr(cv, uc);
950    }
951    return cv;
952}
953
954/*
955 - cmp - chr-substring compare
956 * Backrefs need this.  It should preferably be efficient.
957 * Note that it does not need to report anything except equal/unequal.
958 * Note also that the length is exact, and the comparison should not
959 * stop at embedded NULs!
960 ^ static int cmp(CONST chr *, CONST chr *, size_t);
961 */
962static int				/* 0 for equal, nonzero for unequal */
963cmp(x, y, len)
964    CONST chr *x, *y;			/* strings to compare */
965    size_t len;				/* exact length of comparison */
966{
967    return memcmp(VS(x), VS(y), len*sizeof(chr));
968}
969
970/*
971 - casecmp - case-independent chr-substring compare
972 * REG_ICASE backrefs need this.  It should preferably be efficient.
973 * Note that it does not need to report anything except equal/unequal.
974 * Note also that the length is exact, and the comparison should not
975 * stop at embedded NULs!
976 ^ static int casecmp(CONST chr *, CONST chr *, size_t);
977 */
978static int				/* 0 for equal, nonzero for unequal */
979casecmp(x, y, len)
980    CONST chr *x, *y;			/* strings to compare */
981    size_t len;				/* exact length of comparison */
982{
983    for (; len > 0; len--, x++, y++) {
984	if ((*x!=*y) && (Tcl_UniCharToLower(*x) != Tcl_UniCharToLower(*y))) {
985	    return 1;
986	}
987    }
988    return 0;
989}
990