1/* Copyright (C) 1999-2003, 2005, 2011 Free Software Foundation, Inc. 2 This file is part of the GNU LIBICONV Library. 3 4 The GNU LIBICONV Library is free software; you can redistribute it 5 and/or modify it under the terms of the GNU Library General Public 6 License as published by the Free Software Foundation; either version 2 7 of the License, or (at your option) any later version. 8 9 The GNU LIBICONV Library is distributed in the hope that it will be 10 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 Library General Public License for more details. 13 14 You should have received a copy of the GNU Library General Public 15 License along with the GNU LIBICONV Library; see the file COPYING.LIB. 16 If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 17 Fifth Floor, Boston, MA 02110-1301, USA. */ 18 19/* 20 * Generates a table of small strings, used for transliteration, from a table 21 * containing lines of the form 22 * Unicode <tab> utf-8 replacement <tab> # comment 23 */ 24 25#include <stdio.h> 26#include <stdlib.h> 27#include <stdbool.h> 28 29int main (int argc, char *argv[]) 30{ 31 unsigned int data[0x100000]; 32 int uni2index[0x110000]; 33 int index; 34 35 if (argc != 1) 36 exit(1); 37 38 printf("/*\n"); 39 printf(" * Copyright (C) 1999-2003 Free Software Foundation, Inc.\n"); 40 printf(" * This file is part of the GNU LIBICONV Library.\n"); 41 printf(" *\n"); 42 printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n"); 43 printf(" * and/or modify it under the terms of the GNU Library General Public\n"); 44 printf(" * License as published by the Free Software Foundation; either version 2\n"); 45 printf(" * of the License, or (at your option) any later version.\n"); 46 printf(" *\n"); 47 printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n"); 48 printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n"); 49 printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n"); 50 printf(" * Library General Public License for more details.\n"); 51 printf(" *\n"); 52 printf(" * You should have received a copy of the GNU Library General Public\n"); 53 printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n"); 54 printf(" * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n"); 55 printf(" * Fifth Floor, Boston, MA 02110-1301, USA.\n"); 56 printf(" */\n"); 57 printf("\n"); 58 printf("/*\n"); 59 printf(" * Transliteration table\n"); 60 printf(" */\n"); 61 printf("\n"); 62 { 63 int c; 64 int j; 65 for (j = 0; j < 0x110000; j++) 66 uni2index[j] = -1; 67 index = 0; 68 for (;;) { 69 c = getc(stdin); 70 if (c == EOF) 71 break; 72 if (c == '#') { 73 do { c = getc(stdin); } while (!(c == EOF || c == '\n')); 74 continue; 75 } 76 ungetc(c,stdin); 77 if (scanf("%x",&j) != 1) 78 exit(1); 79 c = getc(stdin); 80 if (c != '\t') 81 exit(1); 82 for (;;) { 83 c = getc(stdin); 84 if (c == EOF || c == '\n') 85 exit(1); 86 if (c == '\t') 87 break; 88 if (uni2index[j] < 0) { 89 uni2index[j] = index; 90 data[index++] = 0; 91 } 92 if (c >= 0x80) { 93 /* Finish reading an UTF-8 character. */ 94 if (c < 0xc0) 95 exit(1); 96 else { 97 unsigned int i = (c < 0xe0 ? 2 : c < 0xf0 ? 3 : c < 0xf8 ? 4 : c < 0xfc ? 5 : 6); 98 c &= (1 << (8-i)) - 1; 99 while (--i > 0) { 100 int cc = getc(stdin); 101 if (!(cc >= 0x80 && cc < 0xc0)) 102 exit(1); 103 c <<= 6; c |= (cc & 0x3f); 104 } 105 } 106 } 107 data[index++] = (unsigned int) c; 108 } 109 if (uni2index[j] >= 0) 110 data[uni2index[j]] = index - uni2index[j] - 1; 111 do { c = getc(stdin); } while (!(c == EOF || c == '\n')); 112 } 113 } 114 printf("static const unsigned int translit_data[%d] = {",index); 115 { 116 int i; 117 for (i = 0; i < index; i++) { 118 if (data[i] < 32) 119 printf("\n %3d,",data[i]); 120 else if (data[i] == '\'') 121 printf("'\\'',"); 122 else if (data[i] == '\\') 123 printf("'\\\\',"); 124 else if (data[i] < 127) 125 printf(" '%c',",data[i]); 126 else if (data[i] < 256) 127 printf("0x%02X,",data[i]); 128 else 129 printf("0x%04X,",data[i]); 130 } 131 printf("\n};\n"); 132 } 133 printf("\n"); 134 { 135 bool pages[0x1100]; 136 int line[0x22000]; 137 int tableno; 138 struct { int minline; int maxline; int usecount; const char* suffix; } tables[0x2000]; 139 int i, j, p, j1, j2, t; 140 141 for (p = 0; p < 0x1100; p++) 142 pages[p] = false; 143 for (j = 0; j < 0x110000; j++) 144 if (uni2index[j] >= 0) 145 pages[j>>8] = true; 146 for (j1 = 0; j1 < 0x22000; j1++) { 147 bool all_invalid = true; 148 for (j2 = 0; j2 < 8; j2++) { 149 j = 8*j1+j2; 150 if (uni2index[j] >= 0) 151 all_invalid = false; 152 } 153 if (all_invalid) 154 line[j1] = -1; 155 else 156 line[j1] = 0; 157 } 158 tableno = 0; 159 for (j1 = 0; j1 < 0x22000; j1++) { 160 if (line[j1] >= 0) { 161 if (tableno > 0 162 && ((j1 > 0 && line[j1-1] == tableno-1) 163 || ((tables[tableno-1].maxline >> 5) == (j1 >> 5) 164 && j1 - tables[tableno-1].maxline <= 8))) { 165 line[j1] = tableno-1; 166 tables[tableno-1].maxline = j1; 167 } else { 168 tableno++; 169 line[j1] = tableno-1; 170 tables[tableno-1].minline = tables[tableno-1].maxline = j1; 171 } 172 } 173 } 174 for (t = 0; t < tableno; t++) { 175 tables[t].usecount = 0; 176 j1 = 8*tables[t].minline; 177 j2 = 8*(tables[t].maxline+1); 178 for (j = j1; j < j2; j++) 179 if (uni2index[j] >= 0) 180 tables[t].usecount++; 181 } 182 for (t = 0, p = -1, i = 0; t < tableno; t++) { 183 if (tables[t].usecount > 1) { 184 char* s; 185 if (p == tables[t].minline >> 5) { 186 s = (char*) malloc(4+1+2+1); 187 sprintf(s, "%02x_%d", p, ++i); 188 } else { 189 p = tables[t].minline >> 5; 190 s = (char*) malloc(4+1); 191 sprintf(s, "%02x", p); 192 } 193 tables[t].suffix = s; 194 } else 195 tables[t].suffix = NULL; 196 } 197 { 198 p = -1; 199 for (t = 0; t < tableno; t++) 200 if (tables[t].usecount > 1) { 201 p = 0; 202 printf("static const short translit_page%s[%d] = {\n", tables[t].suffix, 8*(tables[t].maxline-tables[t].minline+1)); 203 for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) { 204 if ((j1 % 0x20) == 0 && j1 > tables[t].minline) 205 printf(" /* 0x%04x */\n", 8*j1); 206 printf(" "); 207 for (j2 = 0; j2 < 8; j2++) { 208 j = 8*j1+j2; 209 printf(" %4d,", uni2index[j]); 210 } 211 printf(" /* 0x%02x-0x%02x */\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7); 212 } 213 printf("};\n"); 214 } 215 if (p >= 0) 216 printf("\n"); 217 } 218 printf("#define translit_index(wc) \\\n ("); 219 for (j1 = 0; j1 < 0x22000;) { 220 t = line[j1]; 221 for (j2 = j1; j2 < 0x22000 && line[j2] == t; j2++); 222 if (t >= 0) { 223 if (j1 != tables[t].minline) abort(); 224 if (j2 > tables[t].maxline+1) abort(); 225 j2 = tables[t].maxline+1; 226 } 227 if (t == -1) { 228 } else { 229 if (t >= 0 && tables[t].usecount == 0) abort(); 230 if (t >= 0 && tables[t].usecount == 1) { 231 if (j2 != j1+1) abort(); 232 for (j = 8*j1; j < 8*j2; j++) 233 if (uni2index[j] >= 0) { 234 printf("wc == 0x%04x ? %d", j, uni2index[j]); 235 break; 236 } 237 } else { 238 if (j1 == 0) { 239 printf("wc < 0x%04x", 8*j2); 240 } else { 241 printf("wc >= 0x%04x && wc < 0x%04x", 8*j1, 8*j2); 242 } 243 printf(" ? translit_page%s[wc", tables[t].suffix); 244 if (tables[t].minline > 0) 245 printf("-0x%04x", 8*j1); 246 printf("]"); 247 } 248 printf(" : \\\n "); 249 } 250 j1 = j2; 251 } 252 printf("-1)\n"); 253 } 254 255 if (ferror(stdout) || fclose(stdout)) 256 exit(1); 257 exit(0); 258} 259