1/*
2The contents of this file are subject to the Mozilla Public License
3Version 1.1 (the "License"); you may not use this file except in
4compliance with the License. You may obtain a copy of the License at
5http://www.mozilla.org/MPL/
6
7Software distributed under the License is distributed on an "AS IS"
8basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
9License for the specific language governing rights and limitations
10under the License.
11
12The Original Code is expat.
13
14The Initial Developer of the Original Code is James Clark.
15Portions created by James Clark are Copyright (C) 1998, 1999
16James Clark. All Rights Reserved.
17
18Contributor(s):
19
20Alternatively, the contents of this file may be used under the terms
21of the GNU General Public License (the "GPL"), in which case the
22provisions of the GPL are applicable instead of those above.  If you
23wish to allow use of your version of this file only under the terms of
24the GPL and not to allow others to use your version of this file under
25the MPL, indicate your decision by deleting the provisions above and
26replace them with the notice and other provisions required by the
27GPL. If you do not delete the provisions above, a recipient may use
28your version of this file under either the MPL or the GPL.
29*/
30
31#include <string.h>
32#include <stdio.h>
33#include <stddef.h>
34
35struct range {
36  int start;
37  int end;
38};
39
40struct range nmstrt[] = {
41  { '_' },
42  { ':' },
43  /* BaseChar */
44  { 0x0041, 0x005a },
45  { 0x0061, 0x007a },
46  { 0x00c0, 0x00d6 },
47  { 0x00d8, 0x00f6 },
48  { 0x00f8, 0x00ff },
49  { 0x0100, 0x0131 },
50  { 0x0134, 0x013e },
51  { 0x0141, 0x0148 },
52  { 0x014a, 0x017e },
53  { 0x0180, 0x01c3 },
54  { 0x01cd, 0x01f0 },
55  { 0x01f4, 0x01f5 },
56  { 0x01fa, 0x0217 },
57  { 0x0250, 0x02a8 },
58  { 0x02bb, 0x02c1 },
59  { 0x0386 },
60  { 0x0388, 0x038a },
61  { 0x038c },
62  { 0x038e, 0x03a1 },
63  { 0x03a3, 0x03ce },
64  { 0x03d0, 0x03d6 },
65  { 0x03da },
66  { 0x03dc },
67  { 0x03de },
68  { 0x03e0 },
69  { 0x03e2, 0x03f3 },
70  { 0x0401, 0x040c },
71  { 0x040e, 0x044f },
72  { 0x0451, 0x045c },
73  { 0x045e, 0x0481 },
74  { 0x0490, 0x04c4 },
75  { 0x04c7, 0x04c8 },
76  { 0x04cb, 0x04cc },
77  { 0x04d0, 0x04eb },
78  { 0x04ee, 0x04f5 },
79  { 0x04f8, 0x04f9 },
80  { 0x0531, 0x0556 },
81  { 0x0559 },
82  { 0x0561, 0x0586 },
83  { 0x05d0, 0x05ea },
84  { 0x05f0, 0x05f2 },
85  { 0x0621, 0x063a },
86  { 0x0641, 0x064a },
87  { 0x0671, 0x06b7 },
88  { 0x06ba, 0x06be },
89  { 0x06c0, 0x06ce },
90  { 0x06d0, 0x06d3 },
91  { 0x06d5 },
92  { 0x06e5, 0x06e6 },
93  { 0x0905, 0x0939 },
94  { 0x093d },
95  { 0x0958, 0x0961 },
96  { 0x0985, 0x098c },
97  { 0x098f, 0x0990 },
98  { 0x0993, 0x09a8 },
99  { 0x09aa, 0x09b0 },
100  { 0x09b2 },
101  { 0x09b6, 0x09b9 },
102  { 0x09dc, 0x09dd },
103  { 0x09df, 0x09e1 },
104  { 0x09f0, 0x09f1 },
105  { 0x0a05, 0x0a0a },
106  { 0x0a0f, 0x0a10 },
107  { 0x0a13, 0x0a28 },
108  { 0x0a2a, 0x0a30 },
109  { 0x0a32, 0x0a33 },
110  { 0x0a35, 0x0a36 },
111  { 0x0a38, 0x0a39 },
112  { 0x0a59, 0x0a5c },
113  { 0x0a5e },
114  { 0x0a72, 0x0a74 },
115  { 0x0a85, 0x0a8b },
116  { 0x0a8d },
117  { 0x0a8f, 0x0a91 },
118  { 0x0a93, 0x0aa8 },
119  { 0x0aaa, 0x0ab0 },
120  { 0x0ab2, 0x0ab3 },
121  { 0x0ab5, 0x0ab9 },
122  { 0x0abd },
123  { 0x0ae0 },
124  { 0x0b05, 0x0b0c },
125  { 0x0b0f, 0x0b10 },
126  { 0x0b13, 0x0b28 },
127  { 0x0b2a, 0x0b30 },
128  { 0x0b32, 0x0b33 },
129  { 0x0b36, 0x0b39 },
130  { 0x0b3d },
131  { 0x0b5c, 0x0b5d },
132  { 0x0b5f, 0x0b61 },
133  { 0x0b85, 0x0b8a },
134  { 0x0b8e, 0x0b90 },
135  { 0x0b92, 0x0b95 },
136  { 0x0b99, 0x0b9a },
137  { 0x0b9c },
138  { 0x0b9e, 0x0b9f },
139  { 0x0ba3, 0x0ba4 },
140  { 0x0ba8, 0x0baa },
141  { 0x0bae, 0x0bb5 },
142  { 0x0bb7, 0x0bb9 },
143  { 0x0c05, 0x0c0c },
144  { 0x0c0e, 0x0c10 },
145  { 0x0c12, 0x0c28 },
146  { 0x0c2a, 0x0c33 },
147  { 0x0c35, 0x0c39 },
148  { 0x0c60, 0x0c61 },
149  { 0x0c85, 0x0c8c },
150  { 0x0c8e, 0x0c90 },
151  { 0x0c92, 0x0ca8 },
152  { 0x0caa, 0x0cb3 },
153  { 0x0cb5, 0x0cb9 },
154  { 0x0cde },
155  { 0x0ce0, 0x0ce1 },
156  { 0x0d05, 0x0d0c },
157  { 0x0d0e, 0x0d10 },
158  { 0x0d12, 0x0d28 },
159  { 0x0d2a, 0x0d39 },
160  { 0x0d60, 0x0d61 },
161  { 0x0e01, 0x0e2e },
162  { 0x0e30 },
163  { 0x0e32, 0x0e33 },
164  { 0x0e40, 0x0e45 },
165  { 0x0e81, 0x0e82 },
166  { 0x0e84 },
167  { 0x0e87, 0x0e88 },
168  { 0x0e8a },
169  { 0x0e8d },
170  { 0x0e94, 0x0e97 },
171  { 0x0e99, 0x0e9f },
172  { 0x0ea1, 0x0ea3 },
173  { 0x0ea5 },
174  { 0x0ea7 },
175  { 0x0eaa, 0x0eab },
176  { 0x0ead, 0x0eae },
177  { 0x0eb0 },
178  { 0x0eb2, 0x0eb3 },
179  { 0x0ebd },
180  { 0x0ec0, 0x0ec4 },
181  { 0x0f40, 0x0f47 },
182  { 0x0f49, 0x0f69 },
183  { 0x10a0, 0x10c5 },
184  { 0x10d0, 0x10f6 },
185  { 0x1100 },
186  { 0x1102, 0x1103 },
187  { 0x1105, 0x1107 },
188  { 0x1109 },
189  { 0x110b, 0x110c },
190  { 0x110e, 0x1112 },
191  { 0x113c },
192  { 0x113e },
193  { 0x1140 },
194  { 0x114c },
195  { 0x114e },
196  { 0x1150 },
197  { 0x1154, 0x1155 },
198  { 0x1159 },
199  { 0x115f, 0x1161 },
200  { 0x1163 },
201  { 0x1165 },
202  { 0x1167 },
203  { 0x1169 },
204  { 0x116d, 0x116e },
205  { 0x1172, 0x1173 },
206  { 0x1175 },
207  { 0x119e },
208  { 0x11a8 },
209  { 0x11ab },
210  { 0x11ae, 0x11af },
211  { 0x11b7, 0x11b8 },
212  { 0x11ba },
213  { 0x11bc, 0x11c2 },
214  { 0x11eb },
215  { 0x11f0 },
216  { 0x11f9 },
217  { 0x1e00, 0x1e9b },
218  { 0x1ea0, 0x1ef9 },
219  { 0x1f00, 0x1f15 },
220  { 0x1f18, 0x1f1d },
221  { 0x1f20, 0x1f45 },
222  { 0x1f48, 0x1f4d },
223  { 0x1f50, 0x1f57 },
224  { 0x1f59 },
225  { 0x1f5b },
226  { 0x1f5d },
227  { 0x1f5f, 0x1f7d },
228  { 0x1f80, 0x1fb4 },
229  { 0x1fb6, 0x1fbc },
230  { 0x1fbe },
231  { 0x1fc2, 0x1fc4 },
232  { 0x1fc6, 0x1fcc },
233  { 0x1fd0, 0x1fd3 },
234  { 0x1fd6, 0x1fdb },
235  { 0x1fe0, 0x1fec },
236  { 0x1ff2, 0x1ff4 },
237  { 0x1ff6, 0x1ffc },
238  { 0x2126 },
239  { 0x212a, 0x212b },
240  { 0x212e },
241  { 0x2180, 0x2182 },
242  { 0x3041, 0x3094 },
243  { 0x30a1, 0x30fa },
244  { 0x3105, 0x312c },
245  { 0xac00, 0xd7a3 },
246  /* Ideographic */
247  { 0x4e00, 0x9fa5 },
248  { 0x3007 },
249  { 0x3021, 0x3029 },
250};
251
252/* name chars that are not name start chars */
253struct range name[] = {
254  { '.' },
255  { '-' },
256  /* CombiningChar */
257  { 0x0300, 0x0345 },
258  { 0x0360, 0x0361 },
259  { 0x0483, 0x0486 },
260  { 0x0591, 0x05a1 },
261  { 0x05a3, 0x05b9 },
262  { 0x05bb, 0x05bd },
263  { 0x05bf },
264  { 0x05c1, 0x05c2 },
265  { 0x05c4 },
266  { 0x064b, 0x0652 },
267  { 0x0670 },
268  { 0x06d6, 0x06dc },
269  { 0x06dd, 0x06df },
270  { 0x06e0, 0x06e4 },
271  { 0x06e7, 0x06e8 },
272  { 0x06ea, 0x06ed },
273  { 0x0901, 0x0903 },
274  { 0x093c },
275  { 0x093e, 0x094c },
276  { 0x094d },
277  { 0x0951, 0x0954 },
278  { 0x0962, 0x0963 },
279  { 0x0981, 0x0983 },
280  { 0x09bc },
281  { 0x09be },
282  { 0x09bf },
283  { 0x09c0, 0x09c4 },
284  { 0x09c7, 0x09c8 },
285  { 0x09cb, 0x09cd },
286  { 0x09d7 },
287  { 0x09e2, 0x09e3 },
288  { 0x0a02 },
289  { 0x0a3c },
290  { 0x0a3e },
291  { 0x0a3f },
292  { 0x0a40, 0x0a42 },
293  { 0x0a47, 0x0a48 },
294  { 0x0a4b, 0x0a4d },
295  { 0x0a70, 0x0a71 },
296  { 0x0a81, 0x0a83 },
297  { 0x0abc },
298  { 0x0abe, 0x0ac5 },
299  { 0x0ac7, 0x0ac9 },
300  { 0x0acb, 0x0acd },
301  { 0x0b01, 0x0b03 },
302  { 0x0b3c },
303  { 0x0b3e, 0x0b43 },
304  { 0x0b47, 0x0b48 },
305  { 0x0b4b, 0x0b4d },
306  { 0x0b56, 0x0b57 },
307  { 0x0b82, 0x0b83 },
308  { 0x0bbe, 0x0bc2 },
309  { 0x0bc6, 0x0bc8 },
310  { 0x0bca, 0x0bcd },
311  { 0x0bd7 },
312  { 0x0c01, 0x0c03 },
313  { 0x0c3e, 0x0c44 },
314  { 0x0c46, 0x0c48 },
315  { 0x0c4a, 0x0c4d },
316  { 0x0c55, 0x0c56 },
317  { 0x0c82, 0x0c83 },
318  { 0x0cbe, 0x0cc4 },
319  { 0x0cc6, 0x0cc8 },
320  { 0x0cca, 0x0ccd },
321  { 0x0cd5, 0x0cd6 },
322  { 0x0d02, 0x0d03 },
323  { 0x0d3e, 0x0d43 },
324  { 0x0d46, 0x0d48 },
325  { 0x0d4a, 0x0d4d },
326  { 0x0d57 },
327  { 0x0e31 },
328  { 0x0e34, 0x0e3a },
329  { 0x0e47, 0x0e4e },
330  { 0x0eb1 },
331  { 0x0eb4, 0x0eb9 },
332  { 0x0ebb, 0x0ebc },
333  { 0x0ec8, 0x0ecd },
334  { 0x0f18, 0x0f19 },
335  { 0x0f35 },
336  { 0x0f37 },
337  { 0x0f39 },
338  { 0x0f3e },
339  { 0x0f3f },
340  { 0x0f71, 0x0f84 },
341  { 0x0f86, 0x0f8b },
342  { 0x0f90, 0x0f95 },
343  { 0x0f97 },
344  { 0x0f99, 0x0fad },
345  { 0x0fb1, 0x0fb7 },
346  { 0x0fb9 },
347  { 0x20d0, 0x20dc },
348  { 0x20e1 },
349  { 0x302a, 0x302f },
350  { 0x3099 },
351  { 0x309a },
352  /* Digit */
353  { 0x0030, 0x0039 },
354  { 0x0660, 0x0669 },
355  { 0x06f0, 0x06f9 },
356  { 0x0966, 0x096f },
357  { 0x09e6, 0x09ef },
358  { 0x0a66, 0x0a6f },
359  { 0x0ae6, 0x0aef },
360  { 0x0b66, 0x0b6f },
361  { 0x0be7, 0x0bef },
362  { 0x0c66, 0x0c6f },
363  { 0x0ce6, 0x0cef },
364  { 0x0d66, 0x0d6f },
365  { 0x0e50, 0x0e59 },
366  { 0x0ed0, 0x0ed9 },
367  { 0x0f20, 0x0f29 },
368  /* Extender */
369  { 0xb7 },
370  { 0x02d0 },
371  { 0x02d1 },
372  { 0x0387 },
373  { 0x0640 },
374  { 0x0e46 },
375  { 0x0ec6 },
376  { 0x3005 },
377  { 0x3031, 0x3035 },
378  { 0x309d, 0x309e },
379  { 0x30fc, 0x30fe },
380};
381
382void setTab(char *tab, struct range *ranges, size_t nRanges)
383{
384  size_t i;
385  int j;
386  for (i = 0; i < nRanges; i++) {
387    if (ranges[i].end) {
388      for (j = ranges[i].start; j <= ranges[i].end; j++)
389	tab[j] = 1;
390    }
391    else
392      tab[ranges[i].start] = 1;
393  }
394}
395
396void printTabs(char *tab)
397{
398  int nBitmaps = 2;
399  int i, j, k;
400  unsigned char pageIndex[512];
401
402  printf(
403"static const unsigned namingBitmap[] = {\n\
4040x00000000, 0x00000000, 0x00000000, 0x00000000,\n\
4050x00000000, 0x00000000, 0x00000000, 0x00000000,\n\
4060xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,\n\
4070xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,\n");
408  for (i = 0; i < 512; i++) {
409    int kind = tab[i*256];
410    for (j = 1; j < 256; j++)
411      if (tab[i*256 +j] != kind) {
412	kind = -1;
413	break;
414      }
415    if (i >= 256 &&  memcmp(tab + (i - 256)*256, tab + i*256, 256) == 0)
416      pageIndex[i] = pageIndex[i - 256];
417    else if (kind == -1) {
418      pageIndex[i] = nBitmaps++;
419      for (j = 0; j < 8; j++) {
420	unsigned val = 0;
421	for (k = 0; k < 32; k++) {
422	  if (tab[i*256 + j*32 +k])
423	    val |= (1 << k);
424	}
425	printf("0x%08X,", val);
426	putchar((((j + 1) & 3) == 0) ? '\n' : ' ');
427      }
428    }
429    else
430      pageIndex[i] = kind;
431  }
432  printf("};\n");
433  printf("static const unsigned char nmstrtPages[] = {\n");
434  for (i = 0; i < 512; i++) {
435    if (i == 256)
436      printf("};\nstatic const unsigned char namePages[] = {\n");
437    printf("0x%02X,", pageIndex[i]);
438    putchar((((i + 1) & 7) == 0) ? '\n' : ' ');
439  }
440  printf("};\n");
441}
442
443int main()
444{
445  char tab[2*65536];
446  memset(tab, 0, 65536);
447  setTab(tab, nmstrt, sizeof(nmstrt)/sizeof(nmstrt[0]));
448  memcpy(tab + 65536, tab, 65536);
449  setTab(tab + 65536, name, sizeof(name)/sizeof(name[0]));
450  printTabs(tab);
451  return 0;
452}
453