1/**********************************************************************
2  mktable.c
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2007  K.Kosako  <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <stdlib.h>
31#include <stdio.h>
32#include <locale.h>
33
34#define __USE_ISOC99
35#include <ctype.h>
36
37#include "regenc.h"
38
39#define ASCII                0
40#define UNICODE_ISO_8859_1   1
41#define ISO_8859_1           2
42#define ISO_8859_2           3
43#define ISO_8859_3           4
44#define ISO_8859_4           5
45#define ISO_8859_5           6
46#define ISO_8859_6           7
47#define ISO_8859_7           8
48#define ISO_8859_8           9
49#define ISO_8859_9          10
50#define ISO_8859_10         11
51#define ISO_8859_11         12
52#define ISO_8859_13         13
53#define ISO_8859_14         14
54#define ISO_8859_15         15
55#define ISO_8859_16         16
56#define KOI8                17
57#define KOI8_R              18
58
59typedef struct {
60  int   num;
61  const char* name;
62} ENC_INFO;
63
64static ENC_INFO Info[] = {
65  { ASCII,               "ASCII" },
66  { UNICODE_ISO_8859_1,  "UNICODE_ISO_8859_1"  },
67  { ISO_8859_1,  "ISO_8859_1"  },
68  { ISO_8859_2,  "ISO_8859_2"  },
69  { ISO_8859_3,  "ISO_8859_3"  },
70  { ISO_8859_4,  "ISO_8859_4"  },
71  { ISO_8859_5,  "ISO_8859_5"  },
72  { ISO_8859_6,  "ISO_8859_6"  },
73  { ISO_8859_7,  "ISO_8859_7"  },
74  { ISO_8859_8,  "ISO_8859_8"  },
75  { ISO_8859_9,  "ISO_8859_9"  },
76  { ISO_8859_10, "ISO_8859_10" },
77  { ISO_8859_11, "ISO_8859_11" },
78  { ISO_8859_13, "ISO_8859_13" },
79  { ISO_8859_14, "ISO_8859_14" },
80  { ISO_8859_15, "ISO_8859_15" },
81  { ISO_8859_16, "ISO_8859_16" },
82  { KOI8,        "KOI8" },
83  { KOI8_R,      "KOI8_R" }
84};
85
86
87static int IsAlpha(int enc, int c)
88{
89  if (enc == ASCII)
90    return isalpha(c);
91
92  if (c >= 0x41 && c <= 0x5a) return 1;
93  if (c >= 0x61 && c <= 0x7a) return 1;
94
95  switch (enc) {
96  case UNICODE_ISO_8859_1:
97  case ISO_8859_1:
98  case ISO_8859_9:
99    if (c == 0xaa) return 1;
100    if (c == 0xb5) return 1;
101    if (c == 0xba) return 1;
102    if (c >= 0xc0 && c <= 0xd6) return 1;
103    if (c >= 0xd8 && c <= 0xf6) return 1;
104    if (c >= 0xf8 && c <= 0xff) return 1;
105    break;
106
107  case ISO_8859_2:
108    if (c == 0xa1 || c == 0xa3) return 1;
109    if (c == 0xa5 || c == 0xa6) return 1;
110    if (c >= 0xa9 && c <= 0xac) return 1;
111    if (c >= 0xae && c <= 0xaf) return 1;
112    if (c == 0xb1 || c == 0xb3) return 1;
113    if (c == 0xb5 || c == 0xb6) return 1;
114    if (c >= 0xb9 && c <= 0xbc) return 1;
115    if (c >= 0xbe && c <= 0xbf) return 1;
116    if (c >= 0xc0 && c <= 0xd6) return 1;
117    if (c >= 0xd8 && c <= 0xf6) return 1;
118    if (c >= 0xf8 && c <= 0xfe) return 1;
119    break;
120
121  case ISO_8859_3:
122    if (c == 0xa1) return 1;
123    if (c == 0xa6) return 1;
124    if (c >= 0xa9 && c <= 0xac) return 1;
125    if (c == 0xaf) return 1;
126    if (c == 0xb1) return 1;
127    if (c == 0xb5 || c == 0xb6) return 1;
128    if (c >= 0xb9 && c <= 0xbc) return 1;
129    if (c == 0xbf) return 1;
130    if (c >= 0xc0 && c <= 0xc2) return 1;
131    if (c >= 0xc4 && c <= 0xcf) return 1;
132    if (c >= 0xd1 && c <= 0xd6) return 1;
133    if (c >= 0xd8 && c <= 0xe2) return 1;
134    if (c >= 0xe4 && c <= 0xef) return 1;
135    if (c >= 0xf1 && c <= 0xf6) return 1;
136    if (c >= 0xf8 && c <= 0xfe) return 1;
137    break;
138
139  case ISO_8859_4:
140    if (c >= 0xa1 && c <= 0xa3) return 1;
141    if (c == 0xa5 || c == 0xa6) return 1;
142    if (c >= 0xa9 && c <= 0xac) return 1;
143    if (c == 0xae) return 1;
144    if (c == 0xb1 || c == 0xb3) return 1;
145    if (c == 0xb5 || c == 0xb6) return 1;
146    if (c >= 0xb9 && c <= 0xbf) return 1;
147    if (c >= 0xc0 && c <= 0xd6) return 1;
148    if (c >= 0xd8 && c <= 0xf6) return 1;
149    if (c >= 0xf8 && c <= 0xfe) return 1;
150    break;
151
152  case ISO_8859_5:
153    if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
154    if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
155    break;
156
157  case ISO_8859_6:
158    if (c >= 0xc1 && c <= 0xda) return 1;
159    if (c >= 0xe0 && c <= 0xf2) return 1;
160    break;
161
162  case ISO_8859_7:
163    if (c == 0xb6) return 1;
164    if (c >= 0xb8 && c <= 0xba) return 1;
165    if (c == 0xbc) return 1;
166    if (c >= 0xbe && c <= 0xbf) return 1;
167    if (c == 0xc0) return 1;
168    if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
169    if (c >= 0xdc && c <= 0xfe) return 1;
170    break;
171
172  case ISO_8859_8:
173    if (c == 0xb5) return 1;
174    if (c >= 0xe0 && c <= 0xfa) return 1;
175    break;
176
177  case ISO_8859_10:
178    if (c >= 0xa1 && c <= 0xa6) return 1;
179    if (c >= 0xa8 && c <= 0xac) return 1;
180    if (c == 0xae || c == 0xaf) return 1;
181    if (c >= 0xb1 && c <= 0xb6) return 1;
182    if (c >= 0xb8 && c <= 0xbc) return 1;
183    if (c >= 0xbe && c <= 0xff) return 1;
184    break;
185
186  case ISO_8859_11:
187    if (c >= 0xa1 && c <= 0xda) return 1;
188    if (c >= 0xdf && c <= 0xfb) return 1;
189    break;
190
191  case ISO_8859_13:
192    if (c == 0xa8) return 1;
193    if (c == 0xaa) return 1;
194    if (c == 0xaf) return 1;
195    if (c == 0xb5) return 1;
196    if (c == 0xb8) return 1;
197    if (c == 0xba) return 1;
198    if (c >= 0xbf && c <= 0xd6) return 1;
199    if (c >= 0xd8 && c <= 0xf6) return 1;
200    if (c >= 0xf8 && c <= 0xfe) return 1;
201    break;
202
203  case ISO_8859_14:
204    if (c == 0xa1 || c == 0xa2) return 1;
205    if (c == 0xa4 || c == 0xa5) return 1;
206    if (c == 0xa6 || c == 0xa8) return 1;
207    if (c >= 0xaa && c <= 0xac) return 1;
208    if (c >= 0xaf && c <= 0xb5) return 1;
209    if (c >= 0xb7 && c <= 0xff) return 1;
210    break;
211
212  case ISO_8859_15:
213    if (c == 0xaa) return 1;
214    if (c == 0xb5) return 1;
215    if (c == 0xba) return 1;
216    if (c >= 0xc0 && c <= 0xd6) return 1;
217    if (c >= 0xd8 && c <= 0xf6) return 1;
218    if (c >= 0xf8 && c <= 0xff) return 1;
219    if (c == 0xa6) return 1;
220    if (c == 0xa8) return 1;
221    if (c == 0xb4) return 1;
222    if (c == 0xb8) return 1;
223    if (c == 0xbc) return 1;
224    if (c == 0xbd) return 1;
225    if (c == 0xbe) return 1;
226    break;
227
228  case ISO_8859_16:
229    if (c == 0xa1) return 1;
230    if (c == 0xa2) return 1;
231    if (c == 0xa3) return 1;
232    if (c == 0xa6) return 1;
233    if (c == 0xa8) return 1;
234    if (c == 0xaa) return 1;
235    if (c == 0xac) return 1;
236    if (c == 0xae) return 1;
237    if (c == 0xaf) return 1;
238    if (c == 0xb2) return 1;
239    if (c == 0xb3) return 1;
240    if (c == 0xb4) return 1;
241    if (c >= 0xb8 && c <= 0xba) return 1;
242    if (c == 0xbc) return 1;
243    if (c == 0xbd) return 1;
244    if (c == 0xbe) return 1;
245    if (c == 0xbf) return 1;
246    if (c >= 0xc0 && c <= 0xde) return 1;
247    if (c >= 0xdf && c <= 0xff) return 1;
248    break;
249
250  case KOI8_R:
251    if (c == 0xa3 || c == 0xb3) return 1;
252    /* fall */
253  case KOI8:
254    if (c >= 0xc0 && c <= 0xff) return 1;
255    break;
256
257  default:
258    exit(-1);
259  }
260
261  return 0;
262}
263
264static int IsBlank(int enc, int c)
265{
266  if (enc == ASCII)
267    return isblank(c);
268
269  if (c == 0x09	|| c == 0x20) return 1;
270
271  switch (enc) {
272  case UNICODE_ISO_8859_1:
273  case ISO_8859_1:
274  case ISO_8859_2:
275  case ISO_8859_3:
276  case ISO_8859_4:
277  case ISO_8859_5:
278  case ISO_8859_6:
279  case ISO_8859_7:
280  case ISO_8859_8:
281  case ISO_8859_9:
282  case ISO_8859_10:
283  case ISO_8859_11:
284  case ISO_8859_13:
285  case ISO_8859_14:
286  case ISO_8859_15:
287  case ISO_8859_16:
288  case KOI8:
289    if (c == 0xa0) return 1;
290    break;
291
292  case KOI8_R:
293    if (c == 0x9a) return 1;
294    break;
295
296  default:
297    exit(-1);
298  }
299
300  return 0;
301}
302
303static int IsCntrl(int enc, int c)
304{
305  if (enc == ASCII)
306    return iscntrl(c);
307
308  if (c >= 0x00	&& c <= 0x1F) return 1;
309
310  switch (enc) {
311  case UNICODE_ISO_8859_1:
312    if (c == 0xad) return 1;
313    /* fall */
314  case ISO_8859_1:
315  case ISO_8859_2:
316  case ISO_8859_3:
317  case ISO_8859_4:
318  case ISO_8859_5:
319  case ISO_8859_6:
320  case ISO_8859_7:
321  case ISO_8859_8:
322  case ISO_8859_9:
323  case ISO_8859_10:
324  case ISO_8859_11:
325  case ISO_8859_13:
326  case ISO_8859_14:
327  case ISO_8859_15:
328  case ISO_8859_16:
329  case KOI8:
330    if (c >= 0x7f && c <= 0x9F) return 1;
331    break;
332
333
334  case KOI8_R:
335    if (c == 0x7f) return 1;
336    break;
337
338  default:
339    exit(-1);
340  }
341
342  return 0;
343}
344
345static int IsDigit(int enc ARG_UNUSED, int c)
346{
347  if (c >= 0x30 && c <= 0x39) return 1;
348  return 0;
349}
350
351static int IsGraph(int enc, int c)
352{
353  if (enc == ASCII)
354    return isgraph(c);
355
356  if (c >= 0x21 && c <= 0x7e) return 1;
357
358  switch (enc) {
359  case UNICODE_ISO_8859_1:
360  case ISO_8859_1:
361  case ISO_8859_2:
362  case ISO_8859_4:
363  case ISO_8859_5:
364  case ISO_8859_9:
365  case ISO_8859_10:
366  case ISO_8859_13:
367  case ISO_8859_14:
368  case ISO_8859_15:
369  case ISO_8859_16:
370    if (c >= 0xa1 && c <= 0xff) return 1;
371    break;
372
373  case ISO_8859_3:
374    if (c >= 0xa1) {
375      if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 ||
376	  c == 0xe3 || c == 0xf0)
377	return 0;
378      else
379	return 1;
380    }
381    break;
382
383  case ISO_8859_6:
384    if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf)
385      return 1;
386    if (c >= 0xc1 && c <= 0xda) return 1;
387    if (c >= 0xe0 && c <= 0xf2) return 1;
388    break;
389
390  case ISO_8859_7:
391    if (c >= 0xa1 && c <= 0xfe &&
392	c != 0xa4 && c != 0xa5 && c != 0xaa &&
393	c != 0xae && c != 0xd2) return 1;
394    break;
395
396  case ISO_8859_8:
397    if (c >= 0xa2 && c <= 0xfa) {
398      if (c >= 0xbf && c <= 0xde) return 0;
399      return 1;
400    }
401    break;
402
403  case ISO_8859_11:
404    if (c >= 0xa1 && c <= 0xda) return 1;
405    if (c >= 0xdf && c <= 0xfb) return 1;
406    break;
407
408  case KOI8:
409    if (c >= 0xc0 && c <= 0xff) return 1;
410    break;
411
412  case KOI8_R:
413    if (c >= 0x80 && c <= 0xff && c != 0x9a) return 1;
414    break;
415
416  default:
417    exit(-1);
418  }
419
420  return 0;
421}
422
423static int IsLower(int enc, int c)
424{
425  if (enc == ASCII)
426    return islower(c);
427
428  if (c >= 0x61 && c <= 0x7a) return 1;
429
430  switch (enc) {
431  case UNICODE_ISO_8859_1:
432  case ISO_8859_1:
433  case ISO_8859_9:
434    if (c == 0xaa) return 1;
435    if (c == 0xb5) return 1;
436    if (c == 0xba) return 1;
437    if (c >= 0xdf && c <= 0xf6) return 1;
438    if (c >= 0xf8 && c <= 0xff) return 1;
439    break;
440
441  case ISO_8859_2:
442    if (c == 0xb1 || c == 0xb3) return 1;
443    if (c == 0xb5 || c == 0xb6) return 1;
444    if (c >= 0xb9 && c <= 0xbc) return 1;
445    if (c >= 0xbe && c <= 0xbf) return 1;
446    if (c >= 0xdf && c <= 0xf6) return 1;
447    if (c >= 0xf8 && c <= 0xfe) return 1;
448    break;
449
450  case ISO_8859_3:
451    if (c == 0xb1) return 1;
452    if (c == 0xb5 || c == 0xb6) return 1;
453    if (c >= 0xb9 && c <= 0xbc) return 1;
454    if (c == 0xbf) return 1;
455    if (c == 0xdf) return 1;
456    if (c >= 0xe0 && c <= 0xe2) return 1;
457    if (c >= 0xe4 && c <= 0xef) return 1;
458    if (c >= 0xf1 && c <= 0xf6) return 1;
459    if (c >= 0xf8 && c <= 0xfe) return 1;
460    break;
461
462  case ISO_8859_4:
463    if (c == 0xa2) return 1;
464    if (c == 0xb1 || c == 0xb3) return 1;
465    if (c == 0xb5 || c == 0xb6) return 1;
466    if (c >= 0xb9 && c <= 0xbc) return 1;
467    if (c >= 0xbe && c <= 0xbf) return 1;
468    if (c == 0xdf) return 1;
469    if (c >= 0xe0 && c <= 0xf6) return 1;
470    if (c >= 0xf8 && c <= 0xfe) return 1;
471    break;
472
473  case ISO_8859_5:
474    if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
475    break;
476
477  case ISO_8859_6:
478    break;
479
480  case ISO_8859_7:
481    if (c == 0xc0) return 1;
482    if (c >= 0xdc && c <= 0xfe) return 1;
483    break;
484
485  case ISO_8859_8:
486    if (c == 0xb5) return 1;
487    break;
488
489  case ISO_8859_10:
490    if (c >= 0xb1 && c <= 0xb6) return 1;
491    if (c >= 0xb8 && c <= 0xbc) return 1;
492    if (c == 0xbe || c == 0xbf) return 1;
493    if (c >= 0xdf && c <= 0xff) return 1;
494    break;
495
496  case ISO_8859_11:
497    break;
498
499  case ISO_8859_13:
500    if (c == 0xb5) return 1;
501    if (c == 0xb8) return 1;
502    if (c == 0xba) return 1;
503    if (c == 0xbf) return 1;
504    if (c >= 0xdf && c <= 0xf6) return 1;
505    if (c >= 0xf8 && c <= 0xfe) return 1;
506    break;
507
508  case ISO_8859_14:
509    if (c == 0xa2) return 1;
510    if (c == 0xa5) return 1;
511    if (c == 0xab) return 1;
512    if (c == 0xb1 || c == 0xb3 || c == 0xb5) return 1;
513    if (c >= 0xb8 && c <= 0xba) return 1;
514    if (c == 0xbc) return 1;
515    if (c == 0xbe || c == 0xbf) return 1;
516    if (c >= 0xdf && c <= 0xff) return 1;
517    break;
518
519  case ISO_8859_15:
520    if (c == 0xaa) return 1;
521    if (c == 0xb5) return 1;
522    if (c == 0xba) return 1;
523    if (c >= 0xdf && c <= 0xf6) return 1;
524    if (c >= 0xf8 && c <= 0xff) return 1;
525    if (c == 0xa8) return 1;
526    if (c == 0xb8) return 1;
527    if (c == 0xbd) return 1;
528    break;
529
530  case ISO_8859_16:
531    if (c == 0xa2) return 1;
532    if (c == 0xa8) return 1;
533    if (c == 0xae) return 1;
534    if (c == 0xb3) return 1;
535    if (c >= 0xb8 && c <= 0xba) return 1;
536    if (c == 0xbd) return 1;
537    if (c == 0xbf) return 1;
538    if (c >= 0xdf && c <= 0xff) return 1;
539    break;
540
541  case KOI8_R:
542    if (c == 0xa3) return 1;
543    /* fall */
544  case KOI8:
545    if (c >= 0xc0 && c <= 0xdf) return 1;
546    break;
547
548  default:
549    exit(-1);
550  }
551
552  return 0;
553}
554
555static int IsPrint(int enc, int c)
556{
557  if (enc == ASCII)
558    return isprint(c);
559
560  if (c >= 0x20 && c <= 0x7e) return 1;
561
562  switch (enc) {
563  case UNICODE_ISO_8859_1:
564    /* if (c >= 0x09 && c <= 0x0d) return 1; */
565    if (c == 0x85) return 1;
566    /* fall */
567  case ISO_8859_1:
568  case ISO_8859_2:
569  case ISO_8859_4:
570  case ISO_8859_5:
571  case ISO_8859_9:
572  case ISO_8859_10:
573  case ISO_8859_13:
574  case ISO_8859_14:
575  case ISO_8859_15:
576  case ISO_8859_16:
577    if (c >= 0xa0 && c <= 0xff) return 1;
578    break;
579
580  case ISO_8859_3:
581    if (c >= 0xa0) {
582      if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 ||
583	  c == 0xe3 || c == 0xf0)
584	return 0;
585      else
586	return 1;
587    }
588    break;
589
590  case ISO_8859_6:
591    if (c == 0xa0) return 1;
592    if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf)
593      return 1;
594    if (c >= 0xc1 && c <= 0xda) return 1;
595    if (c >= 0xe0 && c <= 0xf2) return 1;
596    break;
597
598  case ISO_8859_7:
599    if (c >= 0xa0 && c <= 0xfe &&
600	c != 0xa4 && c != 0xa5 && c != 0xaa &&
601	c != 0xae && c != 0xd2) return 1;
602    break;
603
604  case ISO_8859_8:
605    if (c >= 0xa0 && c <= 0xfa) {
606      if (c >= 0xbf && c <= 0xde) return 0;
607      if (c == 0xa1) return 0;
608      return 1;
609    }
610    break;
611
612  case ISO_8859_11:
613    if (c >= 0xa0 && c <= 0xda) return 1;
614    if (c >= 0xdf && c <= 0xfb) return 1;
615    break;
616
617  case KOI8:
618    if (c == 0xa0) return 1;
619    if (c >= 0xc0 && c <= 0xff) return 1;
620    break;
621
622  case KOI8_R:
623    if (c >= 0x80 && c <= 0xff) return 1;
624    break;
625
626  default:
627    exit(-1);
628  }
629
630  return 0;
631}
632
633static int IsPunct(int enc, int c)
634{
635  if (enc == ASCII)
636    return ispunct(c);
637
638  if (enc == UNICODE_ISO_8859_1) {
639    if (c == 0x24 || c == 0x2b || c == 0x5e || c == 0x60 ||
640        c == 0x7c || c == 0x7e) return 1;
641    if (c >= 0x3c && c <= 0x3e) return 1;
642  }
643
644  if (c >= 0x21 && c <= 0x2f) return 1;
645  if (c >= 0x3a && c <= 0x40) return 1;
646  if (c >= 0x5b && c <= 0x60) return 1;
647  if (c >= 0x7b && c <= 0x7e) return 1;
648
649  switch (enc) {
650  case ISO_8859_1:
651  case ISO_8859_9:
652  case ISO_8859_15:
653    if (c == 0xad) return 1;
654    /* fall */
655  case UNICODE_ISO_8859_1:
656    if (c == 0xa1) return 1;
657    if (c == 0xab) return 1;
658    if (c == 0xb7) return 1;
659    if (c == 0xbb) return 1;
660    if (c == 0xbf) return 1;
661    break;
662
663  case ISO_8859_2:
664  case ISO_8859_4:
665  case ISO_8859_5:
666  case ISO_8859_14:
667    if (c == 0xad) return 1;
668    break;
669
670  case ISO_8859_3:
671  case ISO_8859_10:
672    if (c == 0xad) return 1;
673    if (c == 0xb7) return 1;
674    if (c == 0xbd) return 1;
675    break;
676
677  case ISO_8859_6:
678    if (c == 0xac) return 1;
679    if (c == 0xad) return 1;
680    if (c == 0xbb) return 1;
681    if (c == 0xbf) return 1;
682    break;
683
684  case ISO_8859_7:
685    if (c == 0xa1 || c == 0xa2) return 1;
686    if (c == 0xab) return 1;
687    if (c == 0xaf) return 1;
688    if (c == 0xad) return 1;
689    if (c == 0xb7 || c == 0xbb) return 1;
690    break;
691
692  case ISO_8859_8:
693    if (c == 0xab) return 1;
694    if (c == 0xad) return 1;
695    if (c == 0xb7) return 1;
696    if (c == 0xbb) return 1;
697    if (c == 0xdf) return 1;
698    break;
699
700  case ISO_8859_13:
701    if (c == 0xa1 || c == 0xa5) return 1;
702    if (c == 0xab || c == 0xad) return 1;
703    if (c == 0xb4 || c == 0xb7) return 1;
704    if (c == 0xbb) return 1;
705    if (c == 0xff) return 1;
706    break;
707
708  case ISO_8859_16:
709    if (c == 0xa5) return 1;
710    if (c == 0xab) return 1;
711    if (c == 0xad) return 1;
712    if (c == 0xb5) return 1;
713    if (c == 0xb7) return 1;
714    if (c == 0xbb) return 1;
715    break;
716
717  case KOI8_R:
718    if (c == 0x9e) return 1;
719    break;
720
721  case ISO_8859_11:
722  case KOI8:
723    break;
724
725  default:
726    exit(-1);
727  }
728
729  return 0;
730}
731
732static int IsSpace(int enc, int c)
733{
734  if (enc == ASCII)
735    return isspace(c);
736
737  if (c >= 0x09 && c <= 0x0d) return 1;
738  if (c == 0x20) return 1;
739
740  switch (enc) {
741  case UNICODE_ISO_8859_1:
742    if (c == 0x85) return 1;
743    /* fall */
744  case ISO_8859_1:
745  case ISO_8859_2:
746  case ISO_8859_3:
747  case ISO_8859_4:
748  case ISO_8859_5:
749  case ISO_8859_6:
750  case ISO_8859_7:
751  case ISO_8859_8:
752  case ISO_8859_9:
753  case ISO_8859_10:
754  case ISO_8859_11:
755  case ISO_8859_13:
756  case ISO_8859_14:
757  case ISO_8859_15:
758  case ISO_8859_16:
759  case KOI8:
760    if (c == 0xa0) return 1;
761    break;
762
763  case KOI8_R:
764    if (c == 0x9a) return 1;
765    break;
766
767  default:
768    exit(-1);
769  }
770
771  return 0;
772}
773
774static int IsUpper(int enc, int c)
775{
776  if (enc == ASCII)
777    return isupper(c);
778
779  if (c >= 0x41 && c <= 0x5a) return 1;
780
781  switch (enc) {
782  case UNICODE_ISO_8859_1:
783  case ISO_8859_1:
784  case ISO_8859_9:
785    if (c >= 0xc0 && c <= 0xd6) return 1;
786    if (c >= 0xd8 && c <= 0xde) return 1;
787    break;
788
789  case ISO_8859_2:
790    if (c == 0xa1 || c == 0xa3) return 1;
791    if (c == 0xa5 || c == 0xa6) return 1;
792    if (c >= 0xa9 && c <= 0xac) return 1;
793    if (c >= 0xae && c <= 0xaf) return 1;
794    if (c >= 0xc0 && c <= 0xd6) return 1;
795    if (c >= 0xd8 && c <= 0xde) return 1;
796    break;
797
798  case ISO_8859_3:
799    if (c == 0xa1) return 1;
800    if (c == 0xa6) return 1;
801    if (c >= 0xa9 && c <= 0xac) return 1;
802    if (c == 0xaf) return 1;
803    if (c >= 0xc0 && c <= 0xc2) return 1;
804    if (c >= 0xc4 && c <= 0xcf) return 1;
805    if (c >= 0xd1 && c <= 0xd6) return 1;
806    if (c >= 0xd8 && c <= 0xde) return 1;
807    break;
808
809  case ISO_8859_4:
810    if (c == 0xa1 || c == 0xa3) return 1;
811    if (c == 0xa5 || c == 0xa6) return 1;
812    if (c >= 0xa9 && c <= 0xac) return 1;
813    if (c == 0xae) return 1;
814    if (c == 0xbd) return 1;
815    if (c >= 0xc0 && c <= 0xd6) return 1;
816    if (c >= 0xd8 && c <= 0xde) return 1;
817    break;
818
819  case ISO_8859_5:
820    if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
821    break;
822
823  case ISO_8859_6:
824    break;
825
826  case ISO_8859_7:
827    if (c == 0xb6) return 1;
828    if (c >= 0xb8 && c <= 0xba) return 1;
829    if (c == 0xbc) return 1;
830    if (c >= 0xbe && c <= 0xbf) return 1;
831    if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
832    break;
833
834  case ISO_8859_8:
835  case ISO_8859_11:
836    break;
837
838  case ISO_8859_10:
839    if (c >= 0xa1 && c <= 0xa6) return 1;
840    if (c >= 0xa8 && c <= 0xac) return 1;
841    if (c == 0xae || c == 0xaf) return 1;
842    if (c >= 0xc0 && c <= 0xde) return 1;
843    break;
844
845  case ISO_8859_13:
846    if (c == 0xa8) return 1;
847    if (c == 0xaa) return 1;
848    if (c == 0xaf) return 1;
849    if (c >= 0xc0 && c <= 0xd6) return 1;
850    if (c >= 0xd8 && c <= 0xde) return 1;
851    break;
852
853  case ISO_8859_14:
854    if (c == 0xa1) return 1;
855    if (c == 0xa4 || c == 0xa6) return 1;
856    if (c == 0xa8) return 1;
857    if (c == 0xaa || c == 0xac) return 1;
858    if (c == 0xaf || c == 0xb0) return 1;
859    if (c == 0xb2 || c == 0xb4 || c == 0xb7) return 1;
860    if (c == 0xbb || c == 0xbd) return 1;
861    if (c >= 0xc0 && c <= 0xde) return 1;
862    break;
863
864  case ISO_8859_15:
865    if (c >= 0xc0 && c <= 0xd6) return 1;
866    if (c >= 0xd8 && c <= 0xde) return 1;
867    if (c == 0xa6) return 1;
868    if (c == 0xb4) return 1;
869    if (c == 0xbc) return 1;
870    if (c == 0xbe) return 1;
871    break;
872
873  case ISO_8859_16:
874    if (c == 0xa1) return 1;
875    if (c == 0xa3) return 1;
876    if (c == 0xa6) return 1;
877    if (c == 0xaa) return 1;
878    if (c == 0xac) return 1;
879    if (c == 0xaf) return 1;
880    if (c == 0xb2) return 1;
881    if (c == 0xb4) return 1;
882    if (c == 0xbc) return 1;
883    if (c == 0xbe) return 1;
884    if (c >= 0xc0 && c <= 0xde) return 1;
885    break;
886
887  case KOI8_R:
888    if (c == 0xb3) return 1;
889    /* fall */
890  case KOI8:
891    if (c >= 0xe0 && c <= 0xff) return 1;
892    break;
893
894  default:
895    exit(-1);
896  }
897
898  return 0;
899}
900
901static int IsXDigit(int enc, int c)
902{
903  if (enc == ASCII)
904    return isxdigit(c);
905
906  if (c >= 0x30 && c <= 0x39) return 1;
907  if (c >= 0x41 && c <= 0x46) return 1;
908  if (c >= 0x61 && c <= 0x66) return 1;
909  return 0;
910}
911
912static int IsWord(int enc, int c)
913{
914  if (enc == ASCII) {
915    return (isalpha(c) || isdigit(c) || c == 0x5f);
916  }
917
918  if (c >= 0x30 && c <= 0x39) return 1;
919  if (c >= 0x41 && c <= 0x5a) return 1;
920  if (c == 0x5f) return 1;
921  if (c >= 0x61 && c <= 0x7a) return 1;
922
923  switch (enc) {
924  case UNICODE_ISO_8859_1:
925  case ISO_8859_1:
926  case ISO_8859_9:
927    if (c == 0xaa) return 1;
928    if (c >= 0xb2 && c <= 0xb3) return 1;
929    if (c == 0xb5) return 1;
930    if (c >= 0xb9 && c <= 0xba) return 1;
931    if (c >= 0xbc && c <= 0xbe) return 1;
932    if (c >= 0xc0 && c <= 0xd6) return 1;
933    if (c >= 0xd8 && c <= 0xf6) return 1;
934    if (c >= 0xf8 && c <= 0xff) return 1;
935    break;
936
937  case ISO_8859_2:
938    if (c == 0xa1 || c == 0xa3) return 1;
939    if (c == 0xa5 || c == 0xa6) return 1;
940    if (c >= 0xa9 && c <= 0xac) return 1;
941    if (c >= 0xae && c <= 0xaf) return 1;
942    if (c == 0xb1 || c == 0xb3) return 1;
943    if (c == 0xb5 || c == 0xb6) return 1;
944    if (c >= 0xb9 && c <= 0xbc) return 1;
945    if (c >= 0xbe && c <= 0xbf) return 1;
946    if (c >= 0xc0 && c <= 0xd6) return 1;
947    if (c >= 0xd8 && c <= 0xf6) return 1;
948    if (c >= 0xf8 && c <= 0xfe) return 1;
949    break;
950
951  case ISO_8859_3:
952    if (c == 0xa1) return 1;
953    if (c == 0xa6) return 1;
954    if (c >= 0xa9 && c <= 0xac) return 1;
955    if (c == 0xaf) return 1;
956    if (c >= 0xb1 && c <= 0xb3) return 1;
957    if (c == 0xb5 || c == 0xb6) return 1;
958    if (c >= 0xb9 && c <= 0xbd) return 1;
959    if (c == 0xbf) return 1;
960    if (c >= 0xc0 && c <= 0xc2) return 1;
961    if (c >= 0xc4 && c <= 0xcf) return 1;
962    if (c >= 0xd1 && c <= 0xd6) return 1;
963    if (c >= 0xd8 && c <= 0xe2) return 1;
964    if (c >= 0xe4 && c <= 0xef) return 1;
965    if (c >= 0xf1 && c <= 0xf6) return 1;
966    if (c >= 0xf8 && c <= 0xfe) return 1;
967    break;
968
969  case ISO_8859_4:
970    if (c >= 0xa1 && c <= 0xa3) return 1;
971    if (c == 0xa5 || c == 0xa6) return 1;
972    if (c >= 0xa9 && c <= 0xac) return 1;
973    if (c == 0xae) return 1;
974    if (c == 0xb1 || c == 0xb3) return 1;
975    if (c == 0xb5 || c == 0xb6) return 1;
976    if (c >= 0xb9 && c <= 0xbf) return 1;
977    if (c >= 0xc0 && c <= 0xd6) return 1;
978    if (c >= 0xd8 && c <= 0xf6) return 1;
979    if (c >= 0xf8 && c <= 0xfe) return 1;
980    break;
981
982  case ISO_8859_5:
983    if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1;
984    if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1;
985    break;
986
987  case ISO_8859_6:
988    if (c >= 0xc1 && c <= 0xda) return 1;
989    if (c >= 0xe0 && c <= 0xea) return 1;
990    if (c >= 0xeb && c <= 0xf2) return 1;
991    break;
992
993  case ISO_8859_7:
994    if (c == 0xb2 || c == 0xb3) return 1;
995    if (c == 0xb6) return 1;
996    if (c >= 0xb8 && c <= 0xba) return 1;
997    if (c >= 0xbc && c <= 0xbf) return 1;
998    if (c == 0xc0) return 1;
999    if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1;
1000    if (c >= 0xdc && c <= 0xfe) return 1;
1001    break;
1002
1003  case ISO_8859_8:
1004    if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1;
1005    if (c >= 0xbc && c <= 0xbe) return 1;
1006    if (c >= 0xe0 && c <= 0xfa) return 1;
1007    break;
1008
1009  case ISO_8859_10:
1010    if (c >= 0xa1 && c <= 0xff) {
1011      if (c != 0xa7 && c != 0xad && c != 0xb0 && c != 0xb7 && c != 0xbd)
1012	return 1;
1013    }
1014    break;
1015
1016  case ISO_8859_11:
1017    if (c >= 0xa1 && c <= 0xda) return 1;
1018    if (c >= 0xdf && c <= 0xfb) return 1;
1019    break;
1020
1021  case ISO_8859_13:
1022    if (c == 0xa8) return 1;
1023    if (c == 0xaa) return 1;
1024    if (c == 0xaf) return 1;
1025    if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1;
1026    if (c >= 0xbc && c <= 0xbe) return 1;
1027    if (c == 0xb8) return 1;
1028    if (c == 0xba) return 1;
1029    if (c >= 0xbf && c <= 0xd6) return 1;
1030    if (c >= 0xd8 && c <= 0xf6) return 1;
1031    if (c >= 0xf8 && c <= 0xfe) return 1;
1032    break;
1033
1034  case ISO_8859_14:
1035    if (c >= 0xa1 && c <= 0xff) {
1036      if (c == 0xa3 || c == 0xa7 || c == 0xa9 || c == 0xad || c == 0xae ||
1037	  c == 0xb6) return 0;
1038      return 1;
1039    }
1040    break;
1041
1042  case ISO_8859_15:
1043    if (c == 0xaa) return 1;
1044    if (c >= 0xb2 && c <= 0xb3) return 1;
1045    if (c == 0xb5) return 1;
1046    if (c >= 0xb9 && c <= 0xba) return 1;
1047    if (c >= 0xbc && c <= 0xbe) return 1;
1048    if (c >= 0xc0 && c <= 0xd6) return 1;
1049    if (c >= 0xd8 && c <= 0xf6) return 1;
1050    if (c >= 0xf8 && c <= 0xff) return 1;
1051    if (c == 0xa6) return 1;
1052    if (c == 0xa8) return 1;
1053    if (c == 0xb4) return 1;
1054    if (c == 0xb8) return 1;
1055    break;
1056
1057  case ISO_8859_16:
1058    if (c == 0xa1) return 1;
1059    if (c == 0xa2) return 1;
1060    if (c == 0xa3) return 1;
1061    if (c == 0xa6) return 1;
1062    if (c == 0xa8) return 1;
1063    if (c == 0xaa) return 1;
1064    if (c == 0xac) return 1;
1065    if (c == 0xae) return 1;
1066    if (c == 0xaf) return 1;
1067    if (c == 0xb2) return 1;
1068    if (c == 0xb3) return 1;
1069    if (c == 0xb4) return 1;
1070    if (c >= 0xb8 && c <= 0xba) return 1;
1071    if (c == 0xbc) return 1;
1072    if (c == 0xbd) return 1;
1073    if (c == 0xbe) return 1;
1074    if (c == 0xbf) return 1;
1075    if (c >= 0xc0 && c <= 0xde) return 1;
1076    if (c >= 0xdf && c <= 0xff) return 1;
1077    break;
1078
1079  case KOI8_R:
1080    if (c == 0x9d) return 1;
1081    if (c == 0xa3 || c == 0xb3) return 1;
1082    /* fall */
1083  case KOI8:
1084    if (c >= 0xc0 && c <= 0xff) return 1;
1085    break;
1086
1087  default:
1088    exit(-1);
1089  }
1090
1091  return 0;
1092}
1093
1094static int IsAscii(int enc ARG_UNUSED, int c)
1095{
1096  if (c >= 0x00 && c <= 0x7f) return 1;
1097  return 0;
1098}
1099
1100static int IsNewline(int enc ARG_UNUSED, int c)
1101{
1102  if (c == 0x0a) return 1;
1103  return 0;
1104}
1105
1106static int exec(FILE* fp, ENC_INFO* einfo)
1107{
1108#define NCOL  8
1109
1110  int c, val, enc;
1111
1112  enc = einfo->num;
1113
1114  fprintf(fp, "static const unsigned short Enc%s_CtypeTable[256] = {\n",
1115	  einfo->name);
1116
1117  for (c = 0; c < 256; c++) {
1118    val = 0;
1119    if (IsNewline(enc, c))  val |= BIT_CTYPE_NEWLINE;
1120    if (IsAlpha (enc, c))   val |= (BIT_CTYPE_ALPHA | BIT_CTYPE_ALNUM);
1121    if (IsBlank (enc, c))   val |= BIT_CTYPE_BLANK;
1122    if (IsCntrl (enc, c))   val |= BIT_CTYPE_CNTRL;
1123    if (IsDigit (enc, c))   val |= (BIT_CTYPE_DIGIT | BIT_CTYPE_ALNUM);
1124    if (IsGraph (enc, c))   val |= BIT_CTYPE_GRAPH;
1125    if (IsLower (enc, c))   val |= BIT_CTYPE_LOWER;
1126    if (IsPrint (enc, c))   val |= BIT_CTYPE_PRINT;
1127    if (IsPunct (enc, c))   val |= BIT_CTYPE_PUNCT;
1128    if (IsSpace (enc, c))   val |= BIT_CTYPE_SPACE;
1129    if (IsUpper (enc, c))   val |= BIT_CTYPE_UPPER;
1130    if (IsXDigit(enc, c))   val |= BIT_CTYPE_XDIGIT;
1131    if (IsWord  (enc, c))   val |= BIT_CTYPE_WORD;
1132    if (IsAscii (enc, c))   val |= BIT_CTYPE_ASCII;
1133
1134    if (c % NCOL == 0) fputs("  ", fp);
1135    fprintf(fp, "0x%04x", val);
1136    if (c != 255) fputs(",", fp);
1137    if (c != 0 && c % NCOL == (NCOL-1))
1138      fputs("\n", fp);
1139    else
1140      fputs(" ", fp);
1141  }
1142  fprintf(fp, "};\n");
1143  return 0;
1144}
1145
1146extern int main(int argc ARG_UNUSED, char* argv[] ARG_UNUSED)
1147{
1148  int i;
1149  FILE* fp = stdout;
1150
1151  setlocale(LC_ALL, "C");
1152  /* setlocale(LC_ALL, "POSIX"); */
1153  /* setlocale(LC_ALL, "en_GB.iso88591"); */
1154  /* setlocale(LC_ALL, "de_BE.iso88591"); */
1155  /* setlocale(LC_ALL, "fr_FR.iso88591"); */
1156
1157  for (i = 0; i < (int )(sizeof(Info)/sizeof(ENC_INFO)); i++) {
1158    exec(fp, &Info[i]);
1159  }
1160
1161  return 0;
1162}
1163