• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-R7000-V1.0.7.12_1.2.5/ap/gpl/timemachine/netatalk-2.2.5/contrib/shell_utils/
1#!/usr/bin/perl
2#
3# usage: make-precompose.h.pl UnicodeData.txt > precompose.h
4#
5# (c) 2008-2011 by HAT <hat@fa2.so-net.ne.jp>
6#
7#  This program is free software; you can redistribute it and/or modify
8#  it under the terms of the GNU General Public License as published by
9#  the Free Software Foundation; either version 2 of the License, or
10#  (at your option) any later version.
11#
12#  This program is distributed in the hope that it will be useful,
13#  but WITHOUT ANY WARRANTY; without even the implied warranty of
14#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15#  GNU General Public License for more details.
16#
17
18# See
19# http://www.unicode.org/Public/UNIDATA/UCD.html
20# http://www.unicode.org/reports/tr15/
21# http://www.unicode.org/Public/*/ucd/UnicodeData*.txt
22# http://www.unicode.org/Public/UNIDATA/UnicodeData.txt
23
24
25# temp files for binary search (compose.TEMP, compose_sp.TEMP) -------------
26
27open(UNICODEDATA, "<$ARGV[0]");
28
29open(COMPOSE_TEMP, ">compose.TEMP");
30open(COMPOSE_SP_TEMP, ">compose_sp.TEMP");
31
32while (<UNICODEDATA>) {
33    chop;
34    (
35     $code0,
36     $Name1,
37     $General_Category2,
38     $Canonical_Combining_Class3,
39     $Bidi_Class4,
40     $Decomposition_Mapping5,
41     $Numeric_Value6,
42     $Numeric_Value7,
43     $Numeric_Value8,
44     $Bidi_Mirrored9,
45     $Unicode_1_Name10,
46     $ISO_Comment11,
47     $Simple_Uppercase_Mapping12,
48     $Simple_Lowercase_Mapping13,
49     $Simple_Titlecase_Mapping14
50    ) = split(/\;/);
51
52    if (($Decomposition_Mapping5 ne "") && ($Decomposition_Mapping5 !~ /\</) && ($Decomposition_Mapping5 =~ / /)) {
53	($base, $comb) = split(/ /,$Decomposition_Mapping5);
54
55	$leftbracket  = "  { ";
56	$rightbracket =" },     ";
57
58	# AFP 3.x Spec
59	if ( ((0x2000  <= hex($code0)) && (hex($code0) <=  0x2FFF))
60	     || ((0xFE30  <= hex($code0)) && (hex($code0) <=  0xFE4F))
61	     || ((0x2F800 <= hex($code0)) && (hex($code0) <= 0x2FA1F))) {
62	    $leftbracket  = "\/\*{ ";
63	    $rightbracket =" },\*\/   ";
64	}
65
66	if (hex($code0) > 0xFFFF) {
67
68	    $code0_sp_hi = 0xD800 - (0x10000 >> 10) + (hex($code0) >> 10);
69	    $code0_sp_lo = 0xDC00 + (hex($code0) & 0x3FF);
70
71	    $base_sp_hi = 0xD800 - (0x10000 >> 10) + (hex($base) >> 10);
72	    $base_sp_lo = 0xDC00 + (hex($base) & 0x3FF);
73
74	    $comb_sp_hi = 0xD800 - (0x10000 >> 10) + (hex($comb) >> 10);
75	    $comb_sp_lo = 0xDC00 + (hex($comb) & 0x3FF);
76
77	    printf(COMPOSE_SP_TEMP "%s0x%04X%04X, 0x%04X%04X, 0x%04X%04X%s\/\* %s \*\/\n",
78		   $leftbracket, $code0_sp_hi ,$code0_sp_lo, $base_sp_hi, $base_sp_lo, $comb_sp_hi, $comb_sp_lo, $rightbracket, $Name1);
79
80	    $leftbracket  = "\/\*{ ";
81	    $rightbracket =" },\*\/   ";
82	}
83
84	printf(COMPOSE_TEMP "%s0x%08X, 0x%08X, 0x%08X%s\/\* %s \*\/\n", $leftbracket, hex($code0), hex($base), hex($comb), $rightbracket, $Name1);
85
86    }
87}
88
89close(UNICODEDATA);
90
91close(COMPOSE_TEMP);
92close(COMPOSE_SP_TEMP);
93
94# macros for BMP (PRECOMP_COUNT, DECOMP_COUNT, MAXCOMBLEN) ----------------
95
96open(COMPOSE_TEMP, "<compose.TEMP");
97
98@comp_table = ();
99$comp_count = 0;
100
101while (<COMPOSE_TEMP>) {
102    if (m/^\/\*/) {
103	next;
104    }
105    $comp_table[$comp_count][0] = substr($_, 4, 10);
106    $comp_table[$comp_count][1] = substr($_, 16, 10);
107    $comp_count++;
108}
109
110$maxcomblen = 2;      # Hangul's maxcomblen is already 2. That is, VT.
111
112for ($i = 0 ; $i < $comp_count ; $i++) {
113    $base = $comp_table[$i][1];
114    $comblen = 1;
115    $j = 0;
116    while ($j < $comp_count) {
117	if ($base ne $comp_table[$j][0]) {
118	    $j++;
119	    next;
120	} else {
121	    $comblen++;
122	    $base =  $comp_table[$j][1];
123	    $j = 0;
124	}
125    }
126    $maxcomblen = ($maxcomblen > $comblen) ? $maxcomblen : $comblen;
127}
128
129close(COMPOSE_TEMP);
130
131# macros for SP (PRECOMP_SP_COUNT,DECOMP_SP_COUNT, MAXCOMBSPLEN) -----------
132
133open(COMPOSE_SP_TEMP, "<compose_sp.TEMP");
134
135@comp_sp_table = ();
136$comp_sp_count = 0;
137
138while (<COMPOSE_SP_TEMP>) {
139    if (m/^\/\*/) {
140	next;
141    }
142    $comp_sp_table[$comp_sp_count][0] = substr($_, 4, 10);
143    $comp_sp_table[$comp_sp_count][1] = substr($_, 16, 10);
144    $comp_sp_count++;
145}
146
147$maxcombsplen = 2;     # one char have 2 codepoints, like a D8xx DCxx.
148
149for ($i = 0 ; $i < $comp_sp_count ; $i++) {
150    $base_sp = $comp_sp_table[$i][1];
151    $comblen = 2;
152    $j = 0;
153    while ($j < $comp_sp_count) {
154	if ($base_sp ne $comp_sp_table[$j][0]) {
155	    $j++;
156	    next;
157	} else {
158	    $comblen += 2;
159	    $base_sp =  $comp_sp_table[$j][1];
160	    $j = 0;
161	}
162    }
163    $maxcombsplen = ($maxcombsplen > $comblen) ? $maxcombsplen : $comblen;
164}
165
166close(COMPOSE_SP_TEMP);
167
168# macro for buffer length (COMBBUFLEN) -------------------------------------
169
170$combbuflen = ($maxcomblen > $maxcombsplen) ? $maxcomblen : $maxcombsplen;
171
172# sort ---------------------------------------------------------------------
173
174system("sort -k 3 compose.TEMP \> precompose.SORT");
175system("sort -k 2 compose.TEMP \>  decompose.SORT");
176
177system("sort -k 3 compose_sp.TEMP \> precompose_sp.SORT");
178system("sort -k 2 compose_sp.TEMP \>  decompose_sp.SORT");
179
180# print  -------------------------------------------------------------------
181
182print ("\/\* DO NOT EDIT BY HAND\!\!\!                                           \*\/\n");
183print ("\/\* This file is generated by                                        \*\/\n");
184printf ("\/\*       contrib/shell_utils/make-precompose.h.pl %s   \*\/\n", $ARGV[0]);
185print ("\n");
186printf ("\/\* %s is got from                                      \*\/\n", $ARGV[0]);
187print ("\/\* http\:\/\/www.unicode.org\/Public\/UNIDATA\/UnicodeData.txt            \*\/\n");
188print ("\n");
189
190print ("\#define SBASE 0xAC00\n");
191print ("\#define LBASE 0x1100\n");
192print ("\#define VBASE 0x1161\n");
193print ("\#define TBASE 0x11A7\n");
194print ("\#define LCOUNT 19\n");
195print ("\#define VCOUNT 21\n");
196print ("\#define TCOUNT 28\n");
197print ("\#define NCOUNT 588     \/\* (VCOUNT \* TCOUNT) \*\/\n");
198print ("\#define SCOUNT 11172   \/\* (LCOUNT \* NCOUNT) \*\/\n");
199print ("\n");
200
201printf ("\#define PRECOMP_COUNT %d\n", $comp_count);
202printf ("\#define DECOMP_COUNT %d\n", $comp_count);
203printf ("\#define MAXCOMBLEN %d\n", $maxcomblen);
204print ("\n");
205printf ("\#define PRECOMP_SP_COUNT %d\n", $comp_sp_count);
206printf ("\#define DECOMP_SP_COUNT %d\n", $comp_sp_count);
207printf ("\#define MAXCOMBSPLEN %d\n", $maxcombsplen);
208print ("\n");
209printf ("\#define COMBBUFLEN %d  \/\* max\(MAXCOMBLEN\,MAXCOMBSPLEN\) \*\/\n", $combbuflen);
210print ("\n");
211
212print ("static const struct \{\n");
213print ("  unsigned int replacement\;\n");
214print ("  unsigned int base\;\n");
215print ("  unsigned int comb\;\n");
216print ("\} precompositions\[\] \= \{\n");
217
218system("cat precompose.SORT");
219
220print ("\}\;\n");
221print ("\n");
222
223print ("static const struct \{\n");
224print ("  unsigned int replacement\;\n");
225print ("  unsigned int base\;\n");
226print ("  unsigned int comb\;\n");
227print ("\} decompositions\[\] \= \{\n");
228
229system("cat decompose.SORT");
230
231print ("\}\;\n");
232print ("\n");
233
234
235
236print ("static const struct \{\n");
237print ("  unsigned int replacement_sp\;\n");
238print ("  unsigned int base_sp\;\n");
239print ("  unsigned int comb_sp\;\n");
240print ("\} precompositions_sp\[\] \= \{\n");
241
242system("cat precompose_sp.SORT");
243
244print ("\}\;\n");
245print ("\n");
246
247print ("static const struct \{\n");
248print ("  unsigned int replacement_sp\;\n");
249print ("  unsigned int base_sp\;\n");
250print ("  unsigned int comb_sp\;\n");
251print ("\} decompositions_sp\[\] \= \{\n");
252
253system("cat decompose_sp.SORT");
254
255print ("\}\;\n");
256print ("\n");
257
258print ("\/\* EOF \*\/\n");
259
260# EOF
261