1/*
2	File:		DecompMakeData.c
3
4	Contains:	Tool to generate tables for use by FixDecomps (CatalogCheck.c). It takes raw data on 				combining classes and decomposition changes, massages it into the trie form needed by
5                the function, and emits it on stdout (which should be directed to a file DecompData.h).
6
7	Copyright:	� 2002 by Apple Computer, Inc., all rights reserved.
8
9	CVS change log:
10
11		$Log: DecompMakeData.c,v $
12		Revision 1.2  2002/12/20 01:20:36  lindak
13		Merged PR-2937515-2 into ZZ100
14		Old HFS+ decompositions need to be repaired
15
16		Revision 1.1.4.1  2002/12/16 18:55:22  jcotting
17		integrated code from text group (Peter Edberg) that will correct some
18		illegal names created with obsolete Unicode 2.1.2 decomposition rules
19		Bug #: 2937515
20		Submitted by: jerry cottingham
21		Reviewed by: don brady
22
23		Revision 1.1.2.1  2002/10/25 17:15:22  jcotting
24		added code from Peter Edberg that will detect and offer replacement
25		names for file system object names with pre-Jaguar decomp errors
26		Bug #: 2937515
27		Submitted by: jerry cottingham
28		Reviewed by: don brady
29
30		Revision 1.1  2002/10/16 06:33:26  pedberg
31		Initial working version of function and related tools and tables
32
33
34	Notes:
35
36	1. To build:
37	cc DecompMakeData.c -o DecompMakeData -g
38
39	2. To use:
40	./DecompMakeData > DecompData.h
41
42*/
43
44#include <stddef.h>
45#include <stdio.h>
46
47// Internal includes
48#include "DecompDataEnums.h"	// enums for data tables
49
50struct UniCharClassAndRepl {
51	u_int16_t	uChar;
52	u_int16_t	combClass;
53	u_int16_t	action;
54	u_int16_t	matchAndReplacement[3];
55};
56typedef struct UniCharClassAndRepl UniCharClassAndRepl;
57
58// The following is the raw data on
59// 1. Current combining classes, derived from the Unicode 3.2.0 data file
60// 2. Changes in decomposition sequences, derived by comparing the canonical decompositions derived from
61// the Unicode 2.1.2 data file with the decompositions derived from the Unicode 3.2.0 data file (in both
62// cases excluding decompositions in the ranges 2000-2FFF, F900-FAFF, etc.).
63// These are folded into a single table so we can do one lookup of the high-order 12 bits of the shifted
64// UniChar to determine if there is anything of interest.
65//
66// Note that these ignore non-BMP characters; the new decompositions and combining classes for those are
67// not really relevant for the purpose of fixing the HFS+ filenames.
68
69static const UniCharClassAndRepl uCharClassAndRepl[] = {
70//	cur char	comb	replacement							next chars that		replacement string
71//	to match	class	action								must also match		for cur or all
72//	--------	-----	----------------------------		---------------		---------------------
73	{ 0x00A8,	  0,	kIfNextOneMatchesReplaceAllWithTwo,	{ 0x030D,			0x00A8, 0x0301			}	},
74	{ 0x01F8,	  0,	kReplaceCurWithTwo,					{					0x004E, 0x0300			}	},
75	{ 0x01F9,	  0,	kReplaceCurWithTwo,					{					0x006E, 0x0300			}	},
76	{ 0x0218,	  0,	kReplaceCurWithTwo,					{					0x0053, 0x0326			}	},
77	{ 0x0219,	  0,	kReplaceCurWithTwo,					{					0x0073, 0x0326			}	},
78	{ 0x021A,	  0,	kReplaceCurWithTwo,					{					0x0054, 0x0326			}	},
79	{ 0x021B,	  0,	kReplaceCurWithTwo,					{					0x0074, 0x0326			}	},
80	{ 0x021E,	  0,	kReplaceCurWithTwo,					{					0x0048, 0x030C			}	},
81	{ 0x021F,	  0,	kReplaceCurWithTwo,					{					0x0068, 0x030C			}	},
82	{ 0x0226,	  0,	kReplaceCurWithTwo,					{					0x0041, 0x0307			}	},
83	{ 0x0227,	  0,	kReplaceCurWithTwo,					{					0x0061, 0x0307			}	},
84	{ 0x0228,	  0,	kReplaceCurWithTwo,					{					0x0045, 0x0327			}	},
85	{ 0x0229,	  0,	kReplaceCurWithTwo,					{					0x0065, 0x0327			}	},
86	{ 0x022A,	  0,	kReplaceCurWithThree,				{					0x004F, 0x0308, 0x0304	}	},
87	{ 0x022B,	  0,	kReplaceCurWithThree,				{					0x006F, 0x0308, 0x0304	}	},
88	{ 0x022C,	  0,	kReplaceCurWithThree,				{					0x004F, 0x0303, 0x0304	}	},
89	{ 0x022D,	  0,	kReplaceCurWithThree,				{					0x006F, 0x0303, 0x0304	}	},
90	{ 0x022E,	  0,	kReplaceCurWithTwo,					{					0x004F, 0x0307			}	},
91	{ 0x022F,	  0,	kReplaceCurWithTwo,					{					0x006F, 0x0307			}	},
92	{ 0x0230,	  0,	kReplaceCurWithThree,				{					0x004F, 0x0307, 0x0304	}	},
93	{ 0x0231,	  0,	kReplaceCurWithThree,				{					0x006F, 0x0307, 0x0304	}	},
94	{ 0x0232,	  0,	kReplaceCurWithTwo,					{					0x0059, 0x0304			}	},
95	{ 0x0233,	  0,	kReplaceCurWithTwo,					{					0x0079, 0x0304			}	},
96	{ 0x0300,	230,	0,	{ 0	}	},
97	{ 0x0301,	230,	0,	{ 0	}	},
98	{ 0x0302,	230,	0,	{ 0	}	},
99	{ 0x0303,	230,	0,	{ 0	}	},
100	{ 0x0304,	230,	0,	{ 0	}	},
101	{ 0x0305,	230,	0,	{ 0	}	},
102	{ 0x0306,	230,	kIfNextOneMatchesReplaceAllWithOne,	{ 0x0307,			0x0310					}	},
103	{ 0x0307,	230,	0,	{ 0	}	},
104	{ 0x0308,	230,	kIfNextOneMatchesReplaceAllWithTwo,	{ 0x030D,			0x0308, 0x0301			}	},
105	{ 0x0309,	230,	0,	{ 0	}	},
106	{ 0x030A,	230,	0,	{ 0	}	},
107	{ 0x030B,	230,	0,	{ 0	}	},
108	{ 0x030C,	230,	0,	{ 0	}	},
109	{ 0x030D,	230,	0,	{ 0	}	},
110	{ 0x030E,	230,	0,	{ 0	}	},
111	{ 0x030F,	230,	0,	{ 0	}	},
112	{ 0x0310,	230,	0,	{ 0	}	},
113	{ 0x0311,	230,	0,	{ 0	}	},
114	{ 0x0312,	230,	0,	{ 0	}	},
115	{ 0x0313,	230,	0,	{ 0	}	},
116	{ 0x0314,	230,	0,	{ 0	}	},
117	{ 0x0315,	232,	0,	{ 0	}	},
118	{ 0x0316,	220,	0,	{ 0	}	},
119	{ 0x0317,	220,	0,	{ 0	}	},
120	{ 0x0318,	220,	0,	{ 0	}	},
121	{ 0x0319,	220,	0,	{ 0	}	},
122	{ 0x031A,	232,	0,	{ 0	}	},
123	{ 0x031B,	216,	0,	{ 0	}	},
124	{ 0x031C,	220,	0,	{ 0	}	},
125	{ 0x031D,	220,	0,	{ 0	}	},
126	{ 0x031E,	220,	0,	{ 0	}	},
127	{ 0x031F,	220,	0,	{ 0	}	},
128	{ 0x0320,	220,	0,	{ 0	}	},
129	{ 0x0321,	202,	0,	{ 0	}	},
130	{ 0x0322,	202,	0,	{ 0	}	},
131	{ 0x0323,	220,	0,	{ 0	}	},
132	{ 0x0324,	220,	0,	{ 0	}	},
133	{ 0x0325,	220,	0,	{ 0	}	},
134	{ 0x0326,	220,	0,	{ 0	}	},
135	{ 0x0327,	202,	0,	{ 0	}	},
136	{ 0x0328,	202,	0,	{ 0	}	},
137	{ 0x0329,	220,	0,	{ 0	}	},
138	{ 0x032A,	220,	0,	{ 0	}	},
139	{ 0x032B,	220,	0,	{ 0	}	},
140	{ 0x032C,	220,	0,	{ 0	}	},
141	{ 0x032D,	220,	0,	{ 0	}	},
142	{ 0x032E,	220,	0,	{ 0	}	},
143	{ 0x032F,	220,	0,	{ 0	}	},
144	{ 0x0330,	220,	0,	{ 0	}	},
145	{ 0x0331,	220,	0,	{ 0	}	},
146	{ 0x0332,	220,	0,	{ 0	}	},
147	{ 0x0333,	220,	0,	{ 0	}	},
148	{ 0x0334,	  1,	0,	{ 0	}	},
149	{ 0x0335,	  1,	0,	{ 0	}	},
150	{ 0x0336,	  1,	0,	{ 0	}	},
151	{ 0x0337,	  1,	0,	{ 0	}	},
152	{ 0x0338,	  1,	0,	{ 0	}	},
153	{ 0x0339,	220,	0,	{ 0	}	},
154	{ 0x033A,	220,	0,	{ 0	}	},
155	{ 0x033B,	220,	0,	{ 0	}	},
156	{ 0x033C,	220,	0,	{ 0	}	},
157	{ 0x033D,	230,	0,	{ 0	}	},
158	{ 0x033E,	230,	0,	{ 0	}	},
159	{ 0x033F,	230,	0,	{ 0	}	},
160	{ 0x0340,	230,	0,	{ 0	}	},
161	{ 0x0341,	230,	0,	{ 0	}	},
162	{ 0x0342,	230,	0,	{ 0	}	},
163	{ 0x0343,	230,	0,	{ 0	}	},
164	{ 0x0344,	230,	0,	{ 0	}	},
165	{ 0x0345,	240,	0,	{ 0	}	},
166	{ 0x0346,	230,	0,	{ 0	}	},
167	{ 0x0347,	220,	0,	{ 0	}	},
168	{ 0x0348,	220,	0,	{ 0	}	},
169	{ 0x0349,	220,	0,	{ 0	}	},
170	{ 0x034A,	230,	0,	{ 0	}	},
171	{ 0x034B,	230,	0,	{ 0	}	},
172	{ 0x034C,	230,	0,	{ 0	}	},
173	{ 0x034D,	220,	0,	{ 0	}	},
174	{ 0x034E,	220,	0,	{ 0	}	},
175	{ 0x0360,	234,	0,	{ 0	}	},
176	{ 0x0361,	234,	0,	{ 0	}	},
177	{ 0x0362,	233,	0,	{ 0	}	},
178	{ 0x0363,	230,	0,	{ 0	}	},	// new char in Uncode 3.2
179	{ 0x0364,	230,	0,	{ 0	}	},	// new char in Uncode 3.2
180	{ 0x0365,	230,	0,	{ 0	}	},	// new char in Uncode 3.2
181	{ 0x0366,	230,	0,	{ 0	}	},	// new char in Uncode 3.2
182	{ 0x0367,	230,	0,	{ 0	}	},	// new char in Uncode 3.2
183	{ 0x0368,	230,	0,	{ 0	}	},	// new char in Uncode 3.2
184	{ 0x0369,	230,	0,	{ 0	}	},	// new char in Uncode 3.2
185	{ 0x036A,	230,	0,	{ 0	}	},	// new char in Uncode 3.2
186	{ 0x036B,	230,	0,	{ 0	}	},	// new char in Uncode 3.2
187	{ 0x036C,	230,	0,	{ 0	}	},	// new char in Uncode 3.2
188	{ 0x036D,	230,	0,	{ 0	}	},	// new char in Uncode 3.2
189	{ 0x036E,	230,	0,	{ 0	}	},	// new char in Uncode 3.2
190	{ 0x036F,	230,	0,	{ 0	}	},	// new char in Uncode 3.2
191	{ 0x0391,	  0,	kIfNextOneMatchesReplaceAllWithTwo,	{ 0x030D,			0x0391, 0x0301			}	},
192	{ 0x0395,	  0,	kIfNextOneMatchesReplaceAllWithTwo,	{ 0x030D,			0x0395, 0x0301			}	},
193	{ 0x0397,	  0,	kIfNextOneMatchesReplaceAllWithTwo,	{ 0x030D,			0x0397, 0x0301			}	},
194	{ 0x0399,	  0,	kIfNextOneMatchesReplaceAllWithTwo,	{ 0x030D,			0x0399, 0x0301			}	},
195	{ 0x039F,	  0,	kIfNextOneMatchesReplaceAllWithTwo,	{ 0x030D,			0x039F, 0x0301			}	},
196	{ 0x03A5,	  0,	kIfNextOneMatchesReplaceAllWithTwo,	{ 0x030D,			0x03A5, 0x0301			}	},
197	{ 0x03A9,	  0,	kIfNextOneMatchesReplaceAllWithTwo,	{ 0x030D,			0x03A9, 0x0301			}	},
198	{ 0x03B1,	  0,	kIfNextOneMatchesReplaceAllWithTwo,	{ 0x030D,			0x03B1, 0x0301			}	},
199	{ 0x03B5,	  0,	kIfNextOneMatchesReplaceAllWithTwo,	{ 0x030D,			0x03B5, 0x0301			}	},
200	{ 0x03B7,	  0,	kIfNextOneMatchesReplaceAllWithTwo,	{ 0x030D,			0x03B7, 0x0301			}	},
201	{ 0x03B9,	  0,	kIfNextOneMatchesReplaceAllWithTwo,	{ 0x030D,			0x03B9, 0x0301			}	},
202	{ 0x03BF,	  0,	kIfNextOneMatchesReplaceAllWithTwo,	{ 0x030D,			0x03BF, 0x0301			}	},
203	{ 0x03C5,	  0,	kIfNextOneMatchesReplaceAllWithTwo,	{ 0x030D,			0x03C5, 0x0301			}	},
204	{ 0x03C9,	  0,	kIfNextOneMatchesReplaceAllWithTwo,	{ 0x030D,			0x03C9, 0x0301			}	},
205	{ 0x03D2,	  0,	kIfNextOneMatchesReplaceAllWithTwo,	{ 0x030D,			0x03D2, 0x0301			}	},
206	{ 0x0400,	  0,	kReplaceCurWithTwo,					{					0x0415, 0x0300			}	},
207	{ 0x040D,	  0,	kReplaceCurWithTwo,					{					0x0418, 0x0300			}	},
208	{ 0x0450,	  0,	kReplaceCurWithTwo,					{					0x0435, 0x0300			}	},
209	{ 0x045D,	  0,	kReplaceCurWithTwo,					{					0x0438, 0x0300			}	},
210	{ 0x0483,	230,	0,	{ 0	}	},
211	{ 0x0484,	230,	0,	{ 0	}	},
212	{ 0x0485,	230,	0,	{ 0	}	},
213	{ 0x0486,	230,	0,	{ 0	}	},
214	{ 0x04EC,	  0,	kReplaceCurWithTwo,					{					0x042D, 0x0308			}	},
215	{ 0x04ED,	  0,	kReplaceCurWithTwo,					{					0x044D, 0x0308			}	},
216	{ 0x0591,	220,	0,	{ 0	}	},
217	{ 0x0592,	230,	0,	{ 0	}	},
218	{ 0x0593,	230,	0,	{ 0	}	},
219	{ 0x0594,	230,	0,	{ 0	}	},
220	{ 0x0595,	230,	0,	{ 0	}	},
221	{ 0x0596,	220,	0,	{ 0	}	},
222	{ 0x0597,	230,	0,	{ 0	}	},
223	{ 0x0598,	230,	0,	{ 0	}	},
224	{ 0x0599,	230,	0,	{ 0	}	},
225	{ 0x059A,	222,	0,	{ 0	}	},
226	{ 0x059B,	220,	0,	{ 0	}	},
227	{ 0x059C,	230,	0,	{ 0	}	},
228	{ 0x059D,	230,	0,	{ 0	}	},
229	{ 0x059E,	230,	0,	{ 0	}	},
230	{ 0x059F,	230,	0,	{ 0	}	},
231	{ 0x05A0,	230,	0,	{ 0	}	},
232	{ 0x05A1,	230,	0,	{ 0	}	},
233	{ 0x05A3,	220,	0,	{ 0	}	},
234	{ 0x05A4,	220,	0,	{ 0	}	},
235	{ 0x05A5,	220,	0,	{ 0	}	},
236	{ 0x05A6,	220,	0,	{ 0	}	},
237	{ 0x05A7,	220,	0,	{ 0	}	},
238	{ 0x05A8,	230,	0,	{ 0	}	},
239	{ 0x05A9,	230,	0,	{ 0	}	},
240	{ 0x05AA,	220,	0,	{ 0	}	},
241	{ 0x05AB,	230,	0,	{ 0	}	},
242	{ 0x05AC,	230,	0,	{ 0	}	},
243	{ 0x05AD,	222,	0,	{ 0	}	},
244	{ 0x05AE,	228,	0,	{ 0	}	},
245	{ 0x05AF,	230,	0,	{ 0	}	},
246	{ 0x05B0,	 10,	0,	{ 0	}	},
247	{ 0x05B1,	 11,	0,	{ 0	}	},
248	{ 0x05B2,	 12,	0,	{ 0	}	},
249	{ 0x05B3,	 13,	0,	{ 0	}	},
250	{ 0x05B4,	 14,	0,	{ 0	}	},
251	{ 0x05B5,	 15,	0,	{ 0	}	},
252	{ 0x05B6,	 16,	0,	{ 0	}	},
253	{ 0x05B7,	 17,	0,	{ 0	}	},
254	{ 0x05B8,	 18,	0,	{ 0	}	},
255	{ 0x05B9,	 19,	0,	{ 0	}	},
256	{ 0x05BB,	 20,	0,	{ 0	}	},
257	{ 0x05BC,	 21,	0,	{ 0	}	},
258	{ 0x05BD,	 22,	0,	{ 0	}	},
259	{ 0x05BF,	 23,	0,	{ 0	}	},
260	{ 0x05C1,	 24,	0,	{ 0	}	},
261	{ 0x05C2,	 25,	0,	{ 0	}	},
262	{ 0x05C4,	230,	0,	{ 0	}	},
263	{ 0x0622,	  0,	kReplaceCurWithTwo,					{					0x0627, 0x0653			}	},
264	{ 0x0623,	  0,	kReplaceCurWithTwo,					{					0x0627, 0x0654			}	},
265	{ 0x0624,	  0,	kReplaceCurWithTwo,					{					0x0648, 0x0654			}	},
266	{ 0x0625,	  0,	kReplaceCurWithTwo,					{					0x0627, 0x0655			}	},
267	{ 0x0626,	  0,	kReplaceCurWithTwo,					{					0x064A, 0x0654			}	},
268	{ 0x064B,	 27,	0,	{ 0	}	},
269	{ 0x064C,	 28,	0,	{ 0	}	},
270	{ 0x064D,	 29,	0,	{ 0	}	},
271	{ 0x064E,	 30,	0,	{ 0	}	},
272	{ 0x064F,	 31,	0,	{ 0	}	},
273	{ 0x0650,	 32,	0,	{ 0	}	},
274	{ 0x0651,	 33,	0,	{ 0	}	},
275	{ 0x0652,	 34,	0,	{ 0	}	},
276	{ 0x0653,	230,	0,	{ 0	}	},
277	{ 0x0654,	230,	0,	{ 0	}	},
278	{ 0x0655,	220,	0,	{ 0	}	},
279	{ 0x0670,	 35,	0,	{ 0	}	},
280	{ 0x06C0,	  0,	kReplaceCurWithTwo,					{					0x06D5, 0x0654			}	},
281	{ 0x06C2,	  0,	kReplaceCurWithTwo,					{					0x06C1, 0x0654			}	},
282	{ 0x06D3,	  0,	kReplaceCurWithTwo,					{					0x06D2, 0x0654			}	},
283	{ 0x06D6,	230,	0,	{ 0	}	},
284	{ 0x06D7,	230,	0,	{ 0	}	},
285	{ 0x06D8,	230,	0,	{ 0	}	},
286	{ 0x06D9,	230,	0,	{ 0	}	},
287	{ 0x06DA,	230,	0,	{ 0	}	},
288	{ 0x06DB,	230,	0,	{ 0	}	},
289	{ 0x06DC,	230,	0,	{ 0	}	},
290	{ 0x06DF,	230,	0,	{ 0	}	},
291	{ 0x06E0,	230,	0,	{ 0	}	},
292	{ 0x06E1,	230,	0,	{ 0	}	},
293	{ 0x06E2,	230,	0,	{ 0	}	},
294	{ 0x06E3,	220,	0,	{ 0	}	},
295	{ 0x06E4,	230,	0,	{ 0	}	},
296	{ 0x06E7,	230,	0,	{ 0	}	},
297	{ 0x06E8,	230,	0,	{ 0	}	},
298	{ 0x06EA,	220,	0,	{ 0	}	},
299	{ 0x06EB,	230,	0,	{ 0	}	},
300	{ 0x06EC,	230,	0,	{ 0	}	},
301	{ 0x06ED,	220,	0,	{ 0	}	},
302	{ 0x0711,	 36,	0,	{ 0	}	},
303	{ 0x0730,	230,	0,	{ 0	}	},
304	{ 0x0731,	220,	0,	{ 0	}	},
305	{ 0x0732,	230,	0,	{ 0	}	},
306	{ 0x0733,	230,	0,	{ 0	}	},
307	{ 0x0734,	220,	0,	{ 0	}	},
308	{ 0x0735,	230,	0,	{ 0	}	},
309	{ 0x0736,	230,	0,	{ 0	}	},
310	{ 0x0737,	220,	0,	{ 0	}	},
311	{ 0x0738,	220,	0,	{ 0	}	},
312	{ 0x0739,	220,	0,	{ 0	}	},
313	{ 0x073A,	230,	0,	{ 0	}	},
314	{ 0x073B,	220,	0,	{ 0	}	},
315	{ 0x073C,	220,	0,	{ 0	}	},
316	{ 0x073D,	230,	0,	{ 0	}	},
317	{ 0x073E,	220,	0,	{ 0	}	},
318	{ 0x073F,	230,	0,	{ 0	}	},
319	{ 0x0740,	230,	0,	{ 0	}	},
320	{ 0x0741,	230,	0,	{ 0	}	},
321	{ 0x0742,	220,	0,	{ 0	}	},
322	{ 0x0743,	230,	0,	{ 0	}	},
323	{ 0x0744,	220,	0,	{ 0	}	},
324	{ 0x0745,	230,	0,	{ 0	}	},
325	{ 0x0746,	220,	0,	{ 0	}	},
326	{ 0x0747,	230,	0,	{ 0	}	},
327	{ 0x0748,	220,	0,	{ 0	}	},
328	{ 0x0749,	230,	0,	{ 0	}	},
329	{ 0x074A,	230,	0,	{ 0	}	},
330	{ 0x093C,	  7,	0,	{ 0	}	},
331	{ 0x094D,	  9,	0,	{ 0	}	},
332	{ 0x0951,	230,	0,	{ 0	}	},
333	{ 0x0952,	220,	0,	{ 0	}	},
334	{ 0x0953,	230,	0,	{ 0	}	},
335	{ 0x0954,	230,	0,	{ 0	}	},
336	{ 0x09AC,	  0,	kIfNextOneMatchesReplaceAllWithOne,	{ 0x09BC,			0x09B0					}	},
337	{ 0x09BC,	  7,	0,	{ 0	}	},
338	{ 0x09CD,	  9,	0,	{ 0	}	},
339	{ 0x0A21,	  0,	kIfNextOneMatchesReplaceAllWithOne,	{ 0x0A3C,			0x0A5C					}	},
340	{ 0x0A33,	  0,	kReplaceCurWithTwo,					{					0x0A32, 0x0A3C			}	},
341	{ 0x0A36,	  0,	kReplaceCurWithTwo,					{					0x0A38, 0x0A3C			}	},
342	{ 0x0A3C,	  7,	0,	{ 0	}	},
343	{ 0x0A4D,	  9,	0,	{ 0	}	},
344	{ 0x0ABC,	  7,	0,	{ 0	}	},
345	{ 0x0ACD,	  9,	0,	{ 0	}	},
346	{ 0x0B2F,	  0,	kIfNextOneMatchesReplaceAllWithOne,	{ 0x0B3C,			0x0B5F					}	},
347	{ 0x0B3C,	  7,	0,	{ 0	}	},
348	{ 0x0B4D,	  9,	0,	{ 0	}	},
349	{ 0x0BCD,	  9,	0,	{ 0	}	},
350	{ 0x0C4D,	  9,	0,	{ 0	}	},
351	{ 0x0C55,	 84,	0,	{ 0	}	},
352	{ 0x0C56,	 91,	0,	{ 0	}	},
353	{ 0x0CCD,	  9,	0,	{ 0	}	},
354	{ 0x0D4D,	  9,	0,	{ 0	}	},
355	{ 0x0DCA,	  9,	0,	{ 0	}	},
356	{ 0x0DDA,	  0,	kReplaceCurWithTwo,					{					0x0DD9, 0x0DCA			}	},
357	{ 0x0DDC,	  0,	kReplaceCurWithTwo,					{					0x0DD9, 0x0DCF			}	},
358	{ 0x0DDD,	  0,	kReplaceCurWithThree,				{					0x0DD9, 0x0DCF, 0x0DCA	}	},
359	{ 0x0DDE,	  0,	kReplaceCurWithTwo,					{					0x0DD9, 0x0DDF			}	},
360	{ 0x0E38,	103,	0,	{ 0	}	},
361	{ 0x0E39,	103,	0,	{ 0	}	},
362	{ 0x0E3A,	  9,	0,	{ 0	}	},
363	{ 0x0E48,	107,	0,	{ 0	}	},
364	{ 0x0E49,	107,	0,	{ 0	}	},
365	{ 0x0E4A,	107,	0,	{ 0	}	},
366	{ 0x0E4B,	107,	0,	{ 0	}	},
367	{ 0x0E4D,	  0,	kIfNextOneMatchesReplaceAllWithOne,	{ 0x0E32,			0x0E33					}	},
368	{ 0x0EB8,	118,	0,	{ 0	}	},
369	{ 0x0EB9,	118,	0,	{ 0	}	},
370	{ 0x0EC8,	122,	0,	{ 0	}	},
371	{ 0x0EC9,	122,	0,	{ 0	}	},
372	{ 0x0ECA,	122,	0,	{ 0	}	},
373	{ 0x0ECB,	122,	0,	{ 0	}	},
374	{ 0x0ECD,	  0,	kIfNextOneMatchesReplaceAllWithOne,	{ 0x0EB2,			0x0EB3					}	},
375	{ 0x0F18,	220,	0,	{ 0	}	},
376	{ 0x0F19,	220,	0,	{ 0	}	},
377	{ 0x0F35,	220,	0,	{ 0	}	},
378	{ 0x0F37,	220,	0,	{ 0	}	},
379	{ 0x0F39,	216,	0,	{ 0	}	},
380	{ 0x0F71,	129,	0,	{ 0	}	},
381	{ 0x0F72,	130,	0,	{ 0	}	},
382	{ 0x0F74,	132,	0,	{ 0	}	},
383	{ 0x0F7A,	130,	0,	{ 0	}	},
384	{ 0x0F7B,	130,	0,	{ 0	}	},
385	{ 0x0F7C,	130,	0,	{ 0	}	},
386	{ 0x0F7D,	130,	0,	{ 0	}	},
387	{ 0x0F80,	130,	0,	{ 0	}	},
388	{ 0x0F82,	230,	0,	{ 0	}	},
389	{ 0x0F83,	230,	0,	{ 0	}	},
390	{ 0x0F84,	  9,	0,	{ 0	}	},
391	{ 0x0F86,	230,	0,	{ 0	}	},
392	{ 0x0F87,	230,	0,	{ 0	}	},
393	{ 0x0FB2,	  0,	kIfNextTwoMatchReplaceAllWithOne,	{ 0x0F80, 0x0F71,	0x0F77					}	},
394	{ 0x0FB3,	  0,	kIfNextTwoMatchReplaceAllWithOne,	{ 0x0F80, 0x0F71,	0x0F79					}	},
395	{ 0x0FC6,	220,	0,	{ 0	}	},
396	{ 0x1026,	  0,	kReplaceCurWithTwo,					{					0x1025, 0x102E			}	},
397	{ 0x1037,	  7,	0,	{ 0	}	},
398	{ 0x1039,	  9,	0,	{ 0	}	},
399	{ 0x1714,	  9,	0,	{ 0	}	},	// new char in Uncode 3.2
400	{ 0x1734,	  9,	0,	{ 0	}	},	// new char in Uncode 3.2
401	{ 0x17D2,	  9,	0,	{ 0	}	},
402	{ 0x18A9,	228,	0,	{ 0	}	},
403	{ 0x20D0,	230,	0,	{ 0	}	},
404	{ 0x20D1,	230,	0,	{ 0	}	},
405	{ 0x20D2,	  1,	0,	{ 0	}	},
406	{ 0x20D3,	  1,	0,	{ 0	}	},
407	{ 0x20D4,	230,	0,	{ 0	}	},
408	{ 0x20D5,	230,	0,	{ 0	}	},
409	{ 0x20D6,	230,	0,	{ 0	}	},
410	{ 0x20D7,	230,	0,	{ 0	}	},
411	{ 0x20D8,	  1,	0,	{ 0	}	},
412	{ 0x20D9,	  1,	0,	{ 0	}	},
413	{ 0x20DA,	  1,	0,	{ 0	}	},
414	{ 0x20DB,	230,	0,	{ 0	}	},
415	{ 0x20DC,	230,	0,	{ 0	}	},
416	{ 0x20E1,	230,	0,	{ 0	}	},
417	{ 0x20E5,	  1,	0,	{ 0	}	},	// new char in Uncode 3.2
418	{ 0x20E6,	  1,	0,	{ 0	}	},	// new char in Uncode 3.2
419	{ 0x20E7,	230,	0,	{ 0	}	},	// new char in Uncode 3.2
420	{ 0x20E8,	220,	0,	{ 0	}	},	// new char in Uncode 3.2
421	{ 0x20E9,	230,	0,	{ 0	}	},	// new char in Uncode 3.2
422	{ 0x20EA,	  1,	0,	{ 0	}	},	// new char in Uncode 3.2
423	{ 0x302A,	218,	0,	{ 0	}	},
424	{ 0x302B,	228,	0,	{ 0	}	},
425	{ 0x302C,	232,	0,	{ 0	}	},
426	{ 0x302D,	222,	0,	{ 0	}	},
427	{ 0x302E,	224,	0,	{ 0	}	},
428	{ 0x302F,	224,	0,	{ 0	}	},
429	{ 0x3099,	  8,	0,	{ 0	}	},
430	{ 0x309A,	  8,	0,	{ 0	}	},
431	{ 0xFB1D,	  0,	kReplaceCurWithTwo,					{					0x05D9, 0x05B4			}	},
432	{ 0xFB1E,	 26,	0,	{ 0	}	},
433	{ 0xFE20,	230,	0,	{ 0	}	},
434	{ 0xFE21,	230,	0,	{ 0	}	},
435	{ 0xFE22,	230,	0,	{ 0	}	},
436	{ 0xFE23,	230,	0,	{ 0	}	},
437	{ 0,		  0,	0,	{ 0	}	}
438};
439
440enum {
441	kMaxRangeCount			= 108,
442	kMaxReplaceDataCount	= 256,
443	kIndexValuesPerLine		= 16,
444	kReplDataValuesPerLine	= 8
445};
446
447static int8_t	rangesIndex[kHiFieldEntryCount];					// if >= 0, then index into xxxRanges[]
448static u_int8_t	classRanges[kMaxRangeCount][kLoFieldEntryCount];
449static u_int8_t	replRanges[kMaxRangeCount][kLoFieldEntryCount];
450static u_int16_t rangesKey[kMaxRangeCount];							// remembers starting Unicode for range
451static u_int16_t replacementData[kMaxReplaceDataCount];
452
453int main(int argc, char *argv[]) {
454	u_int32_t						entryIndex, rangeIndex;
455	const UniCharClassAndRepl *		classAndReplPtr;
456	int32_t							rangeCount;
457	u_int32_t						replDataCount;
458
459	// print header stuff
460	plog("/*\n");
461	plog("\tFile:\t\tDecompData.h\n");
462	plog("\tContains:\tData tables for use in FixDecomps (CatalogCheck.c)\n");
463	plog("\tNote:\t\tThis file is generated automatically by running DecompMakeData\n");
464	plog("*/\n");
465	plog("#include \"DecompDataEnums.h\"\n\n");
466
467	// initialize arrays
468	for (entryIndex = 0; entryIndex < kHiFieldEntryCount; entryIndex++) {
469		rangesIndex[entryIndex] = -1;
470	}
471	for (rangeIndex = 0; rangeIndex < kMaxRangeCount; rangeIndex++) {
472		for (entryIndex = 0; entryIndex < kLoFieldEntryCount; entryIndex++) {
473			classRanges[rangeIndex][entryIndex] = 0;
474			replRanges[rangeIndex][entryIndex] = 0;
475		}
476	}
477	rangeCount = 0;
478	replDataCount = 0;
479	replacementData[replDataCount++] = 0;	// need to start real data at index 1
480
481	// process data
482	for (classAndReplPtr = uCharClassAndRepl; classAndReplPtr->uChar != 0; classAndReplPtr++) {
483		u_int32_t	matchAndReplacementCount, matchAndReplacementIndex;
484		u_int16_t	shiftUChar = classAndReplPtr->uChar + kShiftUniCharOffset;
485		if (shiftUChar >= kShiftUniCharLimit) {
486			plog("Exceeded uChar range for 0x%04X\n", classAndReplPtr->uChar);
487			return 1;
488		}
489		entryIndex = shiftUChar >> kLoFieldBitSize;
490		if (rangesIndex[entryIndex] == -1) {
491			if (rangeCount >= kMaxRangeCount) {
492				plog("Exceeded max range count with 0x%04X\n", classAndReplPtr->uChar);
493				return 1;
494			}
495			rangesKey[rangeCount] = classAndReplPtr->uChar & ~kLoFieldMask;
496			rangesIndex[entryIndex] = rangeCount++;
497		}
498		entryIndex = shiftUChar & kLoFieldMask;
499
500		if (classAndReplPtr->combClass != 0)
501			classRanges[rangeCount - 1][entryIndex] = classAndReplPtr->combClass;
502
503		if (classAndReplPtr->action != 0) {
504			switch (classAndReplPtr->action) {
505				case kReplaceCurWithTwo:
506				case kIfNextOneMatchesReplaceAllWithOne:
507					matchAndReplacementCount = 2;
508					break;
509				case kReplaceCurWithThree:
510				case kIfNextOneMatchesReplaceAllWithTwo:
511				case kIfNextTwoMatchReplaceAllWithOne:
512					matchAndReplacementCount = 3;
513					break;
514				default:
515					matchAndReplacementCount = 0;
516					break;
517			}
518			if (replDataCount + matchAndReplacementCount >= kMaxReplaceDataCount) {
519				plog("Exceeded max replacement data count with 0x%04X\n", classAndReplPtr->uChar);
520				return 1;
521			}
522			replRanges[rangeCount - 1][entryIndex] = replDataCount;
523			replacementData[replDataCount++] = classAndReplPtr->action;
524			for (matchAndReplacementIndex = 0; matchAndReplacementIndex < matchAndReplacementCount; matchAndReplacementIndex++) {
525				replacementData[replDataCount++] = classAndReplPtr->matchAndReplacement[matchAndReplacementIndex];
526			}
527		}
528	}
529
530	// print filled-in index
531	plog("static const int8_t classAndReplIndex[kHiFieldEntryCount] = {\n");
532	for (entryIndex = 0; entryIndex < kHiFieldEntryCount; entryIndex++) {
533		char *	formatPtr = (entryIndex + 1 < kHiFieldEntryCount)? "%2d,\t": "%2d\t";
534		if (entryIndex % kIndexValuesPerLine == 0)			// beginning of line,
535			plog("\t");								//  print tab
536		plog(formatPtr, rangesIndex[entryIndex]);		// print values
537		if ((entryIndex + 1) % kIndexValuesPerLine == 0)		// end of line, print starting UniChar value
538			plog("// uChar 0x%04X-\n", (u_int16_t)(((entryIndex + 1 - kIndexValuesPerLine) << kLoFieldBitSize) - kShiftUniCharOffset) );
539	}
540	plog("};\n\n");
541
542	// print filled in class ranges
543	plog("static const u_int8_t combClassRanges[][kLoFieldEntryCount] = {\n", kLoFieldEntryCount);
544	for (rangeIndex = 0; rangeIndex < rangeCount; rangeIndex++) {
545		plog("\t{\t");
546		for (entryIndex = 0; entryIndex < kLoFieldEntryCount; entryIndex++) {
547			char *	formatPtr = (entryIndex + 1 < kLoFieldEntryCount)? "%3d,": "%3d";
548			plog(formatPtr, classRanges[rangeIndex][entryIndex]);	// print values
549		}
550		plog("\t},\t// uChar 0x%04X-\n", rangesKey[rangeIndex]);
551	}
552	plog("};\n\n");
553
554	// print filled in repl ranges
555	plog("static const u_int8_t replaceRanges[][kLoFieldEntryCount] = {\n", kLoFieldEntryCount);
556	for (rangeIndex = 0; rangeIndex < rangeCount; rangeIndex++) {
557		plog("\t{\t");
558		for (entryIndex = 0; entryIndex < kLoFieldEntryCount; entryIndex++) {
559			char *	formatPtr = (entryIndex + 1 < kLoFieldEntryCount)? "%3d,": "%3d";
560			plog(formatPtr, replRanges[rangeIndex][entryIndex]);	// print values
561		}
562		plog("\t},\t// uChar 0x%04X-\n", rangesKey[rangeIndex]);
563	}
564	plog("};\n\n");
565
566	// print filled in replacement data
567	plog("static const u_int16_t replaceData[] = {\n");
568	for (entryIndex = 0; entryIndex < replDataCount; entryIndex++) {
569		char *	formatPtr = (entryIndex + 1 < replDataCount)? "0x%04X,\t": "0x%04X\t";
570		if (entryIndex % kReplDataValuesPerLine == 0)			// beginning of line,
571			plog("\t");										//  print tab
572		plog(formatPtr, replacementData[entryIndex]);			// print values
573		if ((entryIndex + 1) % kReplDataValuesPerLine == 0 || entryIndex + 1 == replDataCount)	// end of line,
574			plog("// index %d-\n", entryIndex & ~(kReplDataValuesPerLine-1) );	// print starting index value
575	}
576	plog("};\n\n");
577
578	// print summary info
579	plog("// combClassData:\n");
580	plog("// trimmed index: kHiFieldEntryCount(= %d) bytes\n", kHiFieldEntryCount);
581	plog("// ranges: 2 * %d ranges * kLoFieldEntryCount(= %d) bytes = %d\n", rangeCount, kLoFieldEntryCount, 2*rangeCount*kLoFieldEntryCount);
582	plog("// replData: %d entries * 2 = %d\n", replDataCount, 2*replDataCount);
583	plog("// total: %d\n\n", kHiFieldEntryCount + 2*rangeCount*kLoFieldEntryCount + 2*replDataCount);
584
585	return 0;
586}
587