1/* Copyright (C) 1999-2003, 2005, 2011 Free Software Foundation, Inc.
2   This file is part of the GNU LIBICONV Library.
3
4   The GNU LIBICONV Library is free software; you can redistribute it
5   and/or modify it under the terms of the GNU Library General Public
6   License as published by the Free Software Foundation; either version 2
7   of the License, or (at your option) any later version.
8
9   The GNU LIBICONV Library is distributed in the hope that it will be
10   useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12   Library General Public License for more details.
13
14   You should have received a copy of the GNU Library General Public
15   License along with the GNU LIBICONV Library; see the file COPYING.LIB.
16   If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
17   Fifth Floor, Boston, MA 02110-1301, USA.  */
18
19/*
20 * Generates a table of small strings, used for transliteration, from a table
21 * containing lines of the form
22 *   Unicode <tab> utf-8 replacement <tab> # comment
23 */
24
25#include <stdio.h>
26#include <stdlib.h>
27#include <stdbool.h>
28
29int main (int argc, char *argv[])
30{
31  unsigned int data[0x100000];
32  int uni2index[0x110000];
33  int index;
34
35  if (argc != 1)
36    exit(1);
37
38  printf("/*\n");
39  printf(" * Copyright (C) 1999-2003 Free Software Foundation, Inc.\n");
40  printf(" * This file is part of the GNU LIBICONV Library.\n");
41  printf(" *\n");
42  printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n");
43  printf(" * and/or modify it under the terms of the GNU Library General Public\n");
44  printf(" * License as published by the Free Software Foundation; either version 2\n");
45  printf(" * of the License, or (at your option) any later version.\n");
46  printf(" *\n");
47  printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n");
48  printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
49  printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n");
50  printf(" * Library General Public License for more details.\n");
51  printf(" *\n");
52  printf(" * You should have received a copy of the GNU Library General Public\n");
53  printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n");
54  printf(" * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n");
55  printf(" * Fifth Floor, Boston, MA 02110-1301, USA.\n");
56  printf(" */\n");
57  printf("\n");
58  printf("/*\n");
59  printf(" * Transliteration table\n");
60  printf(" */\n");
61  printf("\n");
62  {
63    int c;
64    int j;
65    for (j = 0; j < 0x110000; j++)
66      uni2index[j] = -1;
67    index = 0;
68    for (;;) {
69      c = getc(stdin);
70      if (c == EOF)
71        break;
72      if (c == '#') {
73        do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
74        continue;
75      }
76      ungetc(c,stdin);
77      if (scanf("%x",&j) != 1)
78        exit(1);
79      c = getc(stdin);
80      if (c != '\t')
81        exit(1);
82      for (;;) {
83        c = getc(stdin);
84        if (c == EOF || c == '\n')
85          exit(1);
86        if (c == '\t')
87          break;
88        if (uni2index[j] < 0) {
89          uni2index[j] = index;
90          data[index++] = 0;
91        }
92        if (c >= 0x80) {
93          /* Finish reading an UTF-8 character. */
94          if (c < 0xc0)
95            exit(1);
96          else {
97            unsigned int i = (c < 0xe0 ? 2 : c < 0xf0 ? 3 : c < 0xf8 ? 4 : c < 0xfc ? 5 : 6);
98            c &= (1 << (8-i)) - 1;
99            while (--i > 0) {
100              int cc = getc(stdin);
101              if (!(cc >= 0x80 && cc < 0xc0))
102                exit(1);
103              c <<= 6; c |= (cc & 0x3f);
104            }
105          }
106        }
107        data[index++] = (unsigned int) c;
108      }
109      if (uni2index[j] >= 0)
110        data[uni2index[j]] = index - uni2index[j] - 1;
111      do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
112    }
113  }
114  printf("static const unsigned int translit_data[%d] = {",index);
115  {
116    int i;
117    for (i = 0; i < index; i++) {
118      if (data[i] < 32)
119        printf("\n %3d,",data[i]);
120      else if (data[i] == '\'')
121        printf("'\\'',");
122      else if (data[i] == '\\')
123        printf("'\\\\',");
124      else if (data[i] < 127)
125        printf(" '%c',",data[i]);
126      else if (data[i] < 256)
127        printf("0x%02X,",data[i]);
128      else
129        printf("0x%04X,",data[i]);
130    }
131    printf("\n};\n");
132  }
133  printf("\n");
134  {
135    bool pages[0x1100];
136    int line[0x22000];
137    int tableno;
138    struct { int minline; int maxline; int usecount; const char* suffix; } tables[0x2000];
139    int i, j, p, j1, j2, t;
140
141    for (p = 0; p < 0x1100; p++)
142      pages[p] = false;
143    for (j = 0; j < 0x110000; j++)
144      if (uni2index[j] >= 0)
145        pages[j>>8] = true;
146    for (j1 = 0; j1 < 0x22000; j1++) {
147      bool all_invalid = true;
148      for (j2 = 0; j2 < 8; j2++) {
149        j = 8*j1+j2;
150        if (uni2index[j] >= 0)
151          all_invalid = false;
152      }
153      if (all_invalid)
154        line[j1] = -1;
155      else
156        line[j1] = 0;
157    }
158    tableno = 0;
159    for (j1 = 0; j1 < 0x22000; j1++) {
160      if (line[j1] >= 0) {
161        if (tableno > 0
162            && ((j1 > 0 && line[j1-1] == tableno-1)
163                || ((tables[tableno-1].maxline >> 5) == (j1 >> 5)
164                    && j1 - tables[tableno-1].maxline <= 8))) {
165          line[j1] = tableno-1;
166          tables[tableno-1].maxline = j1;
167        } else {
168          tableno++;
169          line[j1] = tableno-1;
170          tables[tableno-1].minline = tables[tableno-1].maxline = j1;
171        }
172      }
173    }
174    for (t = 0; t < tableno; t++) {
175      tables[t].usecount = 0;
176      j1 = 8*tables[t].minline;
177      j2 = 8*(tables[t].maxline+1);
178      for (j = j1; j < j2; j++)
179        if (uni2index[j] >= 0)
180          tables[t].usecount++;
181    }
182    for (t = 0, p = -1, i = 0; t < tableno; t++) {
183      if (tables[t].usecount > 1) {
184        char* s;
185        if (p == tables[t].minline >> 5) {
186          s = (char*) malloc(4+1+2+1);
187          sprintf(s, "%02x_%d", p, ++i);
188        } else {
189          p = tables[t].minline >> 5;
190          s = (char*) malloc(4+1);
191          sprintf(s, "%02x", p);
192        }
193        tables[t].suffix = s;
194      } else
195        tables[t].suffix = NULL;
196    }
197    {
198      p = -1;
199      for (t = 0; t < tableno; t++)
200        if (tables[t].usecount > 1) {
201          p = 0;
202          printf("static const short translit_page%s[%d] = {\n", tables[t].suffix, 8*(tables[t].maxline-tables[t].minline+1));
203          for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) {
204            if ((j1 % 0x20) == 0 && j1 > tables[t].minline)
205              printf("  /* 0x%04x */\n", 8*j1);
206            printf(" ");
207            for (j2 = 0; j2 < 8; j2++) {
208              j = 8*j1+j2;
209              printf(" %4d,", uni2index[j]);
210            }
211            printf(" /* 0x%02x-0x%02x */\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7);
212          }
213          printf("};\n");
214        }
215      if (p >= 0)
216        printf("\n");
217    }
218    printf("#define translit_index(wc) \\\n  (");
219    for (j1 = 0; j1 < 0x22000;) {
220      t = line[j1];
221      for (j2 = j1; j2 < 0x22000 && line[j2] == t; j2++);
222      if (t >= 0) {
223        if (j1 != tables[t].minline) abort();
224        if (j2 > tables[t].maxline+1) abort();
225        j2 = tables[t].maxline+1;
226      }
227      if (t == -1) {
228      } else {
229        if (t >= 0 && tables[t].usecount == 0) abort();
230        if (t >= 0 && tables[t].usecount == 1) {
231          if (j2 != j1+1) abort();
232          for (j = 8*j1; j < 8*j2; j++)
233            if (uni2index[j] >= 0) {
234              printf("wc == 0x%04x ? %d", j, uni2index[j]);
235              break;
236            }
237        } else {
238          if (j1 == 0) {
239            printf("wc < 0x%04x", 8*j2);
240          } else {
241            printf("wc >= 0x%04x && wc < 0x%04x", 8*j1, 8*j2);
242          }
243          printf(" ? translit_page%s[wc", tables[t].suffix);
244          if (tables[t].minline > 0)
245            printf("-0x%04x", 8*j1);
246          printf("]");
247        }
248        printf(" : \\\n   ");
249      }
250      j1 = j2;
251    }
252    printf("-1)\n");
253  }
254
255  if (ferror(stdout) || fclose(stdout))
256    exit(1);
257  exit(0);
258}
259