1/* 2 File: DecompMakeData.c 3 4 Contains: Tool to generate tables for use by FixDecomps (CatalogCheck.c). It takes raw data on combining classes and decomposition changes, massages it into the trie form needed by 5 the function, and emits it on stdout (which should be directed to a file DecompData.h). 6 7 Copyright: � 2002 by Apple Computer, Inc., all rights reserved. 8 9 CVS change log: 10 11 $Log: DecompMakeData.c,v $ 12 Revision 1.2 2002/12/20 01:20:36 lindak 13 Merged PR-2937515-2 into ZZ100 14 Old HFS+ decompositions need to be repaired 15 16 Revision 1.1.4.1 2002/12/16 18:55:22 jcotting 17 integrated code from text group (Peter Edberg) that will correct some 18 illegal names created with obsolete Unicode 2.1.2 decomposition rules 19 Bug #: 2937515 20 Submitted by: jerry cottingham 21 Reviewed by: don brady 22 23 Revision 1.1.2.1 2002/10/25 17:15:22 jcotting 24 added code from Peter Edberg that will detect and offer replacement 25 names for file system object names with pre-Jaguar decomp errors 26 Bug #: 2937515 27 Submitted by: jerry cottingham 28 Reviewed by: don brady 29 30 Revision 1.1 2002/10/16 06:33:26 pedberg 31 Initial working version of function and related tools and tables 32 33 34 Notes: 35 36 1. To build: 37 cc DecompMakeData.c -o DecompMakeData -g 38 39 2. To use: 40 ./DecompMakeData > DecompData.h 41 42*/ 43 44#include <stddef.h> 45#include <stdio.h> 46 47// Internal includes 48#include "DecompDataEnums.h" // enums for data tables 49 50struct UniCharClassAndRepl { 51 u_int16_t uChar; 52 u_int16_t combClass; 53 u_int16_t action; 54 u_int16_t matchAndReplacement[3]; 55}; 56typedef struct UniCharClassAndRepl UniCharClassAndRepl; 57 58// The following is the raw data on 59// 1. Current combining classes, derived from the Unicode 3.2.0 data file 60// 2. Changes in decomposition sequences, derived by comparing the canonical decompositions derived from 61// the Unicode 2.1.2 data file with the decompositions derived from the Unicode 3.2.0 data file (in both 62// cases excluding decompositions in the ranges 2000-2FFF, F900-FAFF, etc.). 63// These are folded into a single table so we can do one lookup of the high-order 12 bits of the shifted 64// UniChar to determine if there is anything of interest. 65// 66// Note that these ignore non-BMP characters; the new decompositions and combining classes for those are 67// not really relevant for the purpose of fixing the HFS+ filenames. 68 69static const UniCharClassAndRepl uCharClassAndRepl[] = { 70// cur char comb replacement next chars that replacement string 71// to match class action must also match for cur or all 72// -------- ----- ---------------------------- --------------- --------------------- 73 { 0x00A8, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x00A8, 0x0301 } }, 74 { 0x01F8, 0, kReplaceCurWithTwo, { 0x004E, 0x0300 } }, 75 { 0x01F9, 0, kReplaceCurWithTwo, { 0x006E, 0x0300 } }, 76 { 0x0218, 0, kReplaceCurWithTwo, { 0x0053, 0x0326 } }, 77 { 0x0219, 0, kReplaceCurWithTwo, { 0x0073, 0x0326 } }, 78 { 0x021A, 0, kReplaceCurWithTwo, { 0x0054, 0x0326 } }, 79 { 0x021B, 0, kReplaceCurWithTwo, { 0x0074, 0x0326 } }, 80 { 0x021E, 0, kReplaceCurWithTwo, { 0x0048, 0x030C } }, 81 { 0x021F, 0, kReplaceCurWithTwo, { 0x0068, 0x030C } }, 82 { 0x0226, 0, kReplaceCurWithTwo, { 0x0041, 0x0307 } }, 83 { 0x0227, 0, kReplaceCurWithTwo, { 0x0061, 0x0307 } }, 84 { 0x0228, 0, kReplaceCurWithTwo, { 0x0045, 0x0327 } }, 85 { 0x0229, 0, kReplaceCurWithTwo, { 0x0065, 0x0327 } }, 86 { 0x022A, 0, kReplaceCurWithThree, { 0x004F, 0x0308, 0x0304 } }, 87 { 0x022B, 0, kReplaceCurWithThree, { 0x006F, 0x0308, 0x0304 } }, 88 { 0x022C, 0, kReplaceCurWithThree, { 0x004F, 0x0303, 0x0304 } }, 89 { 0x022D, 0, kReplaceCurWithThree, { 0x006F, 0x0303, 0x0304 } }, 90 { 0x022E, 0, kReplaceCurWithTwo, { 0x004F, 0x0307 } }, 91 { 0x022F, 0, kReplaceCurWithTwo, { 0x006F, 0x0307 } }, 92 { 0x0230, 0, kReplaceCurWithThree, { 0x004F, 0x0307, 0x0304 } }, 93 { 0x0231, 0, kReplaceCurWithThree, { 0x006F, 0x0307, 0x0304 } }, 94 { 0x0232, 0, kReplaceCurWithTwo, { 0x0059, 0x0304 } }, 95 { 0x0233, 0, kReplaceCurWithTwo, { 0x0079, 0x0304 } }, 96 { 0x0300, 230, 0, { 0 } }, 97 { 0x0301, 230, 0, { 0 } }, 98 { 0x0302, 230, 0, { 0 } }, 99 { 0x0303, 230, 0, { 0 } }, 100 { 0x0304, 230, 0, { 0 } }, 101 { 0x0305, 230, 0, { 0 } }, 102 { 0x0306, 230, kIfNextOneMatchesReplaceAllWithOne, { 0x0307, 0x0310 } }, 103 { 0x0307, 230, 0, { 0 } }, 104 { 0x0308, 230, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x0308, 0x0301 } }, 105 { 0x0309, 230, 0, { 0 } }, 106 { 0x030A, 230, 0, { 0 } }, 107 { 0x030B, 230, 0, { 0 } }, 108 { 0x030C, 230, 0, { 0 } }, 109 { 0x030D, 230, 0, { 0 } }, 110 { 0x030E, 230, 0, { 0 } }, 111 { 0x030F, 230, 0, { 0 } }, 112 { 0x0310, 230, 0, { 0 } }, 113 { 0x0311, 230, 0, { 0 } }, 114 { 0x0312, 230, 0, { 0 } }, 115 { 0x0313, 230, 0, { 0 } }, 116 { 0x0314, 230, 0, { 0 } }, 117 { 0x0315, 232, 0, { 0 } }, 118 { 0x0316, 220, 0, { 0 } }, 119 { 0x0317, 220, 0, { 0 } }, 120 { 0x0318, 220, 0, { 0 } }, 121 { 0x0319, 220, 0, { 0 } }, 122 { 0x031A, 232, 0, { 0 } }, 123 { 0x031B, 216, 0, { 0 } }, 124 { 0x031C, 220, 0, { 0 } }, 125 { 0x031D, 220, 0, { 0 } }, 126 { 0x031E, 220, 0, { 0 } }, 127 { 0x031F, 220, 0, { 0 } }, 128 { 0x0320, 220, 0, { 0 } }, 129 { 0x0321, 202, 0, { 0 } }, 130 { 0x0322, 202, 0, { 0 } }, 131 { 0x0323, 220, 0, { 0 } }, 132 { 0x0324, 220, 0, { 0 } }, 133 { 0x0325, 220, 0, { 0 } }, 134 { 0x0326, 220, 0, { 0 } }, 135 { 0x0327, 202, 0, { 0 } }, 136 { 0x0328, 202, 0, { 0 } }, 137 { 0x0329, 220, 0, { 0 } }, 138 { 0x032A, 220, 0, { 0 } }, 139 { 0x032B, 220, 0, { 0 } }, 140 { 0x032C, 220, 0, { 0 } }, 141 { 0x032D, 220, 0, { 0 } }, 142 { 0x032E, 220, 0, { 0 } }, 143 { 0x032F, 220, 0, { 0 } }, 144 { 0x0330, 220, 0, { 0 } }, 145 { 0x0331, 220, 0, { 0 } }, 146 { 0x0332, 220, 0, { 0 } }, 147 { 0x0333, 220, 0, { 0 } }, 148 { 0x0334, 1, 0, { 0 } }, 149 { 0x0335, 1, 0, { 0 } }, 150 { 0x0336, 1, 0, { 0 } }, 151 { 0x0337, 1, 0, { 0 } }, 152 { 0x0338, 1, 0, { 0 } }, 153 { 0x0339, 220, 0, { 0 } }, 154 { 0x033A, 220, 0, { 0 } }, 155 { 0x033B, 220, 0, { 0 } }, 156 { 0x033C, 220, 0, { 0 } }, 157 { 0x033D, 230, 0, { 0 } }, 158 { 0x033E, 230, 0, { 0 } }, 159 { 0x033F, 230, 0, { 0 } }, 160 { 0x0340, 230, 0, { 0 } }, 161 { 0x0341, 230, 0, { 0 } }, 162 { 0x0342, 230, 0, { 0 } }, 163 { 0x0343, 230, 0, { 0 } }, 164 { 0x0344, 230, 0, { 0 } }, 165 { 0x0345, 240, 0, { 0 } }, 166 { 0x0346, 230, 0, { 0 } }, 167 { 0x0347, 220, 0, { 0 } }, 168 { 0x0348, 220, 0, { 0 } }, 169 { 0x0349, 220, 0, { 0 } }, 170 { 0x034A, 230, 0, { 0 } }, 171 { 0x034B, 230, 0, { 0 } }, 172 { 0x034C, 230, 0, { 0 } }, 173 { 0x034D, 220, 0, { 0 } }, 174 { 0x034E, 220, 0, { 0 } }, 175 { 0x0360, 234, 0, { 0 } }, 176 { 0x0361, 234, 0, { 0 } }, 177 { 0x0362, 233, 0, { 0 } }, 178 { 0x0363, 230, 0, { 0 } }, // new char in Uncode 3.2 179 { 0x0364, 230, 0, { 0 } }, // new char in Uncode 3.2 180 { 0x0365, 230, 0, { 0 } }, // new char in Uncode 3.2 181 { 0x0366, 230, 0, { 0 } }, // new char in Uncode 3.2 182 { 0x0367, 230, 0, { 0 } }, // new char in Uncode 3.2 183 { 0x0368, 230, 0, { 0 } }, // new char in Uncode 3.2 184 { 0x0369, 230, 0, { 0 } }, // new char in Uncode 3.2 185 { 0x036A, 230, 0, { 0 } }, // new char in Uncode 3.2 186 { 0x036B, 230, 0, { 0 } }, // new char in Uncode 3.2 187 { 0x036C, 230, 0, { 0 } }, // new char in Uncode 3.2 188 { 0x036D, 230, 0, { 0 } }, // new char in Uncode 3.2 189 { 0x036E, 230, 0, { 0 } }, // new char in Uncode 3.2 190 { 0x036F, 230, 0, { 0 } }, // new char in Uncode 3.2 191 { 0x0391, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x0391, 0x0301 } }, 192 { 0x0395, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x0395, 0x0301 } }, 193 { 0x0397, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x0397, 0x0301 } }, 194 { 0x0399, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x0399, 0x0301 } }, 195 { 0x039F, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x039F, 0x0301 } }, 196 { 0x03A5, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x03A5, 0x0301 } }, 197 { 0x03A9, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x03A9, 0x0301 } }, 198 { 0x03B1, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x03B1, 0x0301 } }, 199 { 0x03B5, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x03B5, 0x0301 } }, 200 { 0x03B7, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x03B7, 0x0301 } }, 201 { 0x03B9, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x03B9, 0x0301 } }, 202 { 0x03BF, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x03BF, 0x0301 } }, 203 { 0x03C5, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x03C5, 0x0301 } }, 204 { 0x03C9, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x03C9, 0x0301 } }, 205 { 0x03D2, 0, kIfNextOneMatchesReplaceAllWithTwo, { 0x030D, 0x03D2, 0x0301 } }, 206 { 0x0400, 0, kReplaceCurWithTwo, { 0x0415, 0x0300 } }, 207 { 0x040D, 0, kReplaceCurWithTwo, { 0x0418, 0x0300 } }, 208 { 0x0450, 0, kReplaceCurWithTwo, { 0x0435, 0x0300 } }, 209 { 0x045D, 0, kReplaceCurWithTwo, { 0x0438, 0x0300 } }, 210 { 0x0483, 230, 0, { 0 } }, 211 { 0x0484, 230, 0, { 0 } }, 212 { 0x0485, 230, 0, { 0 } }, 213 { 0x0486, 230, 0, { 0 } }, 214 { 0x04EC, 0, kReplaceCurWithTwo, { 0x042D, 0x0308 } }, 215 { 0x04ED, 0, kReplaceCurWithTwo, { 0x044D, 0x0308 } }, 216 { 0x0591, 220, 0, { 0 } }, 217 { 0x0592, 230, 0, { 0 } }, 218 { 0x0593, 230, 0, { 0 } }, 219 { 0x0594, 230, 0, { 0 } }, 220 { 0x0595, 230, 0, { 0 } }, 221 { 0x0596, 220, 0, { 0 } }, 222 { 0x0597, 230, 0, { 0 } }, 223 { 0x0598, 230, 0, { 0 } }, 224 { 0x0599, 230, 0, { 0 } }, 225 { 0x059A, 222, 0, { 0 } }, 226 { 0x059B, 220, 0, { 0 } }, 227 { 0x059C, 230, 0, { 0 } }, 228 { 0x059D, 230, 0, { 0 } }, 229 { 0x059E, 230, 0, { 0 } }, 230 { 0x059F, 230, 0, { 0 } }, 231 { 0x05A0, 230, 0, { 0 } }, 232 { 0x05A1, 230, 0, { 0 } }, 233 { 0x05A3, 220, 0, { 0 } }, 234 { 0x05A4, 220, 0, { 0 } }, 235 { 0x05A5, 220, 0, { 0 } }, 236 { 0x05A6, 220, 0, { 0 } }, 237 { 0x05A7, 220, 0, { 0 } }, 238 { 0x05A8, 230, 0, { 0 } }, 239 { 0x05A9, 230, 0, { 0 } }, 240 { 0x05AA, 220, 0, { 0 } }, 241 { 0x05AB, 230, 0, { 0 } }, 242 { 0x05AC, 230, 0, { 0 } }, 243 { 0x05AD, 222, 0, { 0 } }, 244 { 0x05AE, 228, 0, { 0 } }, 245 { 0x05AF, 230, 0, { 0 } }, 246 { 0x05B0, 10, 0, { 0 } }, 247 { 0x05B1, 11, 0, { 0 } }, 248 { 0x05B2, 12, 0, { 0 } }, 249 { 0x05B3, 13, 0, { 0 } }, 250 { 0x05B4, 14, 0, { 0 } }, 251 { 0x05B5, 15, 0, { 0 } }, 252 { 0x05B6, 16, 0, { 0 } }, 253 { 0x05B7, 17, 0, { 0 } }, 254 { 0x05B8, 18, 0, { 0 } }, 255 { 0x05B9, 19, 0, { 0 } }, 256 { 0x05BB, 20, 0, { 0 } }, 257 { 0x05BC, 21, 0, { 0 } }, 258 { 0x05BD, 22, 0, { 0 } }, 259 { 0x05BF, 23, 0, { 0 } }, 260 { 0x05C1, 24, 0, { 0 } }, 261 { 0x05C2, 25, 0, { 0 } }, 262 { 0x05C4, 230, 0, { 0 } }, 263 { 0x0622, 0, kReplaceCurWithTwo, { 0x0627, 0x0653 } }, 264 { 0x0623, 0, kReplaceCurWithTwo, { 0x0627, 0x0654 } }, 265 { 0x0624, 0, kReplaceCurWithTwo, { 0x0648, 0x0654 } }, 266 { 0x0625, 0, kReplaceCurWithTwo, { 0x0627, 0x0655 } }, 267 { 0x0626, 0, kReplaceCurWithTwo, { 0x064A, 0x0654 } }, 268 { 0x064B, 27, 0, { 0 } }, 269 { 0x064C, 28, 0, { 0 } }, 270 { 0x064D, 29, 0, { 0 } }, 271 { 0x064E, 30, 0, { 0 } }, 272 { 0x064F, 31, 0, { 0 } }, 273 { 0x0650, 32, 0, { 0 } }, 274 { 0x0651, 33, 0, { 0 } }, 275 { 0x0652, 34, 0, { 0 } }, 276 { 0x0653, 230, 0, { 0 } }, 277 { 0x0654, 230, 0, { 0 } }, 278 { 0x0655, 220, 0, { 0 } }, 279 { 0x0670, 35, 0, { 0 } }, 280 { 0x06C0, 0, kReplaceCurWithTwo, { 0x06D5, 0x0654 } }, 281 { 0x06C2, 0, kReplaceCurWithTwo, { 0x06C1, 0x0654 } }, 282 { 0x06D3, 0, kReplaceCurWithTwo, { 0x06D2, 0x0654 } }, 283 { 0x06D6, 230, 0, { 0 } }, 284 { 0x06D7, 230, 0, { 0 } }, 285 { 0x06D8, 230, 0, { 0 } }, 286 { 0x06D9, 230, 0, { 0 } }, 287 { 0x06DA, 230, 0, { 0 } }, 288 { 0x06DB, 230, 0, { 0 } }, 289 { 0x06DC, 230, 0, { 0 } }, 290 { 0x06DF, 230, 0, { 0 } }, 291 { 0x06E0, 230, 0, { 0 } }, 292 { 0x06E1, 230, 0, { 0 } }, 293 { 0x06E2, 230, 0, { 0 } }, 294 { 0x06E3, 220, 0, { 0 } }, 295 { 0x06E4, 230, 0, { 0 } }, 296 { 0x06E7, 230, 0, { 0 } }, 297 { 0x06E8, 230, 0, { 0 } }, 298 { 0x06EA, 220, 0, { 0 } }, 299 { 0x06EB, 230, 0, { 0 } }, 300 { 0x06EC, 230, 0, { 0 } }, 301 { 0x06ED, 220, 0, { 0 } }, 302 { 0x0711, 36, 0, { 0 } }, 303 { 0x0730, 230, 0, { 0 } }, 304 { 0x0731, 220, 0, { 0 } }, 305 { 0x0732, 230, 0, { 0 } }, 306 { 0x0733, 230, 0, { 0 } }, 307 { 0x0734, 220, 0, { 0 } }, 308 { 0x0735, 230, 0, { 0 } }, 309 { 0x0736, 230, 0, { 0 } }, 310 { 0x0737, 220, 0, { 0 } }, 311 { 0x0738, 220, 0, { 0 } }, 312 { 0x0739, 220, 0, { 0 } }, 313 { 0x073A, 230, 0, { 0 } }, 314 { 0x073B, 220, 0, { 0 } }, 315 { 0x073C, 220, 0, { 0 } }, 316 { 0x073D, 230, 0, { 0 } }, 317 { 0x073E, 220, 0, { 0 } }, 318 { 0x073F, 230, 0, { 0 } }, 319 { 0x0740, 230, 0, { 0 } }, 320 { 0x0741, 230, 0, { 0 } }, 321 { 0x0742, 220, 0, { 0 } }, 322 { 0x0743, 230, 0, { 0 } }, 323 { 0x0744, 220, 0, { 0 } }, 324 { 0x0745, 230, 0, { 0 } }, 325 { 0x0746, 220, 0, { 0 } }, 326 { 0x0747, 230, 0, { 0 } }, 327 { 0x0748, 220, 0, { 0 } }, 328 { 0x0749, 230, 0, { 0 } }, 329 { 0x074A, 230, 0, { 0 } }, 330 { 0x093C, 7, 0, { 0 } }, 331 { 0x094D, 9, 0, { 0 } }, 332 { 0x0951, 230, 0, { 0 } }, 333 { 0x0952, 220, 0, { 0 } }, 334 { 0x0953, 230, 0, { 0 } }, 335 { 0x0954, 230, 0, { 0 } }, 336 { 0x09AC, 0, kIfNextOneMatchesReplaceAllWithOne, { 0x09BC, 0x09B0 } }, 337 { 0x09BC, 7, 0, { 0 } }, 338 { 0x09CD, 9, 0, { 0 } }, 339 { 0x0A21, 0, kIfNextOneMatchesReplaceAllWithOne, { 0x0A3C, 0x0A5C } }, 340 { 0x0A33, 0, kReplaceCurWithTwo, { 0x0A32, 0x0A3C } }, 341 { 0x0A36, 0, kReplaceCurWithTwo, { 0x0A38, 0x0A3C } }, 342 { 0x0A3C, 7, 0, { 0 } }, 343 { 0x0A4D, 9, 0, { 0 } }, 344 { 0x0ABC, 7, 0, { 0 } }, 345 { 0x0ACD, 9, 0, { 0 } }, 346 { 0x0B2F, 0, kIfNextOneMatchesReplaceAllWithOne, { 0x0B3C, 0x0B5F } }, 347 { 0x0B3C, 7, 0, { 0 } }, 348 { 0x0B4D, 9, 0, { 0 } }, 349 { 0x0BCD, 9, 0, { 0 } }, 350 { 0x0C4D, 9, 0, { 0 } }, 351 { 0x0C55, 84, 0, { 0 } }, 352 { 0x0C56, 91, 0, { 0 } }, 353 { 0x0CCD, 9, 0, { 0 } }, 354 { 0x0D4D, 9, 0, { 0 } }, 355 { 0x0DCA, 9, 0, { 0 } }, 356 { 0x0DDA, 0, kReplaceCurWithTwo, { 0x0DD9, 0x0DCA } }, 357 { 0x0DDC, 0, kReplaceCurWithTwo, { 0x0DD9, 0x0DCF } }, 358 { 0x0DDD, 0, kReplaceCurWithThree, { 0x0DD9, 0x0DCF, 0x0DCA } }, 359 { 0x0DDE, 0, kReplaceCurWithTwo, { 0x0DD9, 0x0DDF } }, 360 { 0x0E38, 103, 0, { 0 } }, 361 { 0x0E39, 103, 0, { 0 } }, 362 { 0x0E3A, 9, 0, { 0 } }, 363 { 0x0E48, 107, 0, { 0 } }, 364 { 0x0E49, 107, 0, { 0 } }, 365 { 0x0E4A, 107, 0, { 0 } }, 366 { 0x0E4B, 107, 0, { 0 } }, 367 { 0x0E4D, 0, kIfNextOneMatchesReplaceAllWithOne, { 0x0E32, 0x0E33 } }, 368 { 0x0EB8, 118, 0, { 0 } }, 369 { 0x0EB9, 118, 0, { 0 } }, 370 { 0x0EC8, 122, 0, { 0 } }, 371 { 0x0EC9, 122, 0, { 0 } }, 372 { 0x0ECA, 122, 0, { 0 } }, 373 { 0x0ECB, 122, 0, { 0 } }, 374 { 0x0ECD, 0, kIfNextOneMatchesReplaceAllWithOne, { 0x0EB2, 0x0EB3 } }, 375 { 0x0F18, 220, 0, { 0 } }, 376 { 0x0F19, 220, 0, { 0 } }, 377 { 0x0F35, 220, 0, { 0 } }, 378 { 0x0F37, 220, 0, { 0 } }, 379 { 0x0F39, 216, 0, { 0 } }, 380 { 0x0F71, 129, 0, { 0 } }, 381 { 0x0F72, 130, 0, { 0 } }, 382 { 0x0F74, 132, 0, { 0 } }, 383 { 0x0F7A, 130, 0, { 0 } }, 384 { 0x0F7B, 130, 0, { 0 } }, 385 { 0x0F7C, 130, 0, { 0 } }, 386 { 0x0F7D, 130, 0, { 0 } }, 387 { 0x0F80, 130, 0, { 0 } }, 388 { 0x0F82, 230, 0, { 0 } }, 389 { 0x0F83, 230, 0, { 0 } }, 390 { 0x0F84, 9, 0, { 0 } }, 391 { 0x0F86, 230, 0, { 0 } }, 392 { 0x0F87, 230, 0, { 0 } }, 393 { 0x0FB2, 0, kIfNextTwoMatchReplaceAllWithOne, { 0x0F80, 0x0F71, 0x0F77 } }, 394 { 0x0FB3, 0, kIfNextTwoMatchReplaceAllWithOne, { 0x0F80, 0x0F71, 0x0F79 } }, 395 { 0x0FC6, 220, 0, { 0 } }, 396 { 0x1026, 0, kReplaceCurWithTwo, { 0x1025, 0x102E } }, 397 { 0x1037, 7, 0, { 0 } }, 398 { 0x1039, 9, 0, { 0 } }, 399 { 0x1714, 9, 0, { 0 } }, // new char in Uncode 3.2 400 { 0x1734, 9, 0, { 0 } }, // new char in Uncode 3.2 401 { 0x17D2, 9, 0, { 0 } }, 402 { 0x18A9, 228, 0, { 0 } }, 403 { 0x20D0, 230, 0, { 0 } }, 404 { 0x20D1, 230, 0, { 0 } }, 405 { 0x20D2, 1, 0, { 0 } }, 406 { 0x20D3, 1, 0, { 0 } }, 407 { 0x20D4, 230, 0, { 0 } }, 408 { 0x20D5, 230, 0, { 0 } }, 409 { 0x20D6, 230, 0, { 0 } }, 410 { 0x20D7, 230, 0, { 0 } }, 411 { 0x20D8, 1, 0, { 0 } }, 412 { 0x20D9, 1, 0, { 0 } }, 413 { 0x20DA, 1, 0, { 0 } }, 414 { 0x20DB, 230, 0, { 0 } }, 415 { 0x20DC, 230, 0, { 0 } }, 416 { 0x20E1, 230, 0, { 0 } }, 417 { 0x20E5, 1, 0, { 0 } }, // new char in Uncode 3.2 418 { 0x20E6, 1, 0, { 0 } }, // new char in Uncode 3.2 419 { 0x20E7, 230, 0, { 0 } }, // new char in Uncode 3.2 420 { 0x20E8, 220, 0, { 0 } }, // new char in Uncode 3.2 421 { 0x20E9, 230, 0, { 0 } }, // new char in Uncode 3.2 422 { 0x20EA, 1, 0, { 0 } }, // new char in Uncode 3.2 423 { 0x302A, 218, 0, { 0 } }, 424 { 0x302B, 228, 0, { 0 } }, 425 { 0x302C, 232, 0, { 0 } }, 426 { 0x302D, 222, 0, { 0 } }, 427 { 0x302E, 224, 0, { 0 } }, 428 { 0x302F, 224, 0, { 0 } }, 429 { 0x3099, 8, 0, { 0 } }, 430 { 0x309A, 8, 0, { 0 } }, 431 { 0xFB1D, 0, kReplaceCurWithTwo, { 0x05D9, 0x05B4 } }, 432 { 0xFB1E, 26, 0, { 0 } }, 433 { 0xFE20, 230, 0, { 0 } }, 434 { 0xFE21, 230, 0, { 0 } }, 435 { 0xFE22, 230, 0, { 0 } }, 436 { 0xFE23, 230, 0, { 0 } }, 437 { 0, 0, 0, { 0 } } 438}; 439 440enum { 441 kMaxRangeCount = 108, 442 kMaxReplaceDataCount = 256, 443 kIndexValuesPerLine = 16, 444 kReplDataValuesPerLine = 8 445}; 446 447static int8_t rangesIndex[kHiFieldEntryCount]; // if >= 0, then index into xxxRanges[] 448static u_int8_t classRanges[kMaxRangeCount][kLoFieldEntryCount]; 449static u_int8_t replRanges[kMaxRangeCount][kLoFieldEntryCount]; 450static u_int16_t rangesKey[kMaxRangeCount]; // remembers starting Unicode for range 451static u_int16_t replacementData[kMaxReplaceDataCount]; 452 453int main(int argc, char *argv[]) { 454 u_int32_t entryIndex, rangeIndex; 455 const UniCharClassAndRepl * classAndReplPtr; 456 int32_t rangeCount; 457 u_int32_t replDataCount; 458 459 // print header stuff 460 plog("/*\n"); 461 plog("\tFile:\t\tDecompData.h\n"); 462 plog("\tContains:\tData tables for use in FixDecomps (CatalogCheck.c)\n"); 463 plog("\tNote:\t\tThis file is generated automatically by running DecompMakeData\n"); 464 plog("*/\n"); 465 plog("#include \"DecompDataEnums.h\"\n\n"); 466 467 // initialize arrays 468 for (entryIndex = 0; entryIndex < kHiFieldEntryCount; entryIndex++) { 469 rangesIndex[entryIndex] = -1; 470 } 471 for (rangeIndex = 0; rangeIndex < kMaxRangeCount; rangeIndex++) { 472 for (entryIndex = 0; entryIndex < kLoFieldEntryCount; entryIndex++) { 473 classRanges[rangeIndex][entryIndex] = 0; 474 replRanges[rangeIndex][entryIndex] = 0; 475 } 476 } 477 rangeCount = 0; 478 replDataCount = 0; 479 replacementData[replDataCount++] = 0; // need to start real data at index 1 480 481 // process data 482 for (classAndReplPtr = uCharClassAndRepl; classAndReplPtr->uChar != 0; classAndReplPtr++) { 483 u_int32_t matchAndReplacementCount, matchAndReplacementIndex; 484 u_int16_t shiftUChar = classAndReplPtr->uChar + kShiftUniCharOffset; 485 if (shiftUChar >= kShiftUniCharLimit) { 486 plog("Exceeded uChar range for 0x%04X\n", classAndReplPtr->uChar); 487 return 1; 488 } 489 entryIndex = shiftUChar >> kLoFieldBitSize; 490 if (rangesIndex[entryIndex] == -1) { 491 if (rangeCount >= kMaxRangeCount) { 492 plog("Exceeded max range count with 0x%04X\n", classAndReplPtr->uChar); 493 return 1; 494 } 495 rangesKey[rangeCount] = classAndReplPtr->uChar & ~kLoFieldMask; 496 rangesIndex[entryIndex] = rangeCount++; 497 } 498 entryIndex = shiftUChar & kLoFieldMask; 499 500 if (classAndReplPtr->combClass != 0) 501 classRanges[rangeCount - 1][entryIndex] = classAndReplPtr->combClass; 502 503 if (classAndReplPtr->action != 0) { 504 switch (classAndReplPtr->action) { 505 case kReplaceCurWithTwo: 506 case kIfNextOneMatchesReplaceAllWithOne: 507 matchAndReplacementCount = 2; 508 break; 509 case kReplaceCurWithThree: 510 case kIfNextOneMatchesReplaceAllWithTwo: 511 case kIfNextTwoMatchReplaceAllWithOne: 512 matchAndReplacementCount = 3; 513 break; 514 default: 515 matchAndReplacementCount = 0; 516 break; 517 } 518 if (replDataCount + matchAndReplacementCount >= kMaxReplaceDataCount) { 519 plog("Exceeded max replacement data count with 0x%04X\n", classAndReplPtr->uChar); 520 return 1; 521 } 522 replRanges[rangeCount - 1][entryIndex] = replDataCount; 523 replacementData[replDataCount++] = classAndReplPtr->action; 524 for (matchAndReplacementIndex = 0; matchAndReplacementIndex < matchAndReplacementCount; matchAndReplacementIndex++) { 525 replacementData[replDataCount++] = classAndReplPtr->matchAndReplacement[matchAndReplacementIndex]; 526 } 527 } 528 } 529 530 // print filled-in index 531 plog("static const int8_t classAndReplIndex[kHiFieldEntryCount] = {\n"); 532 for (entryIndex = 0; entryIndex < kHiFieldEntryCount; entryIndex++) { 533 char * formatPtr = (entryIndex + 1 < kHiFieldEntryCount)? "%2d,\t": "%2d\t"; 534 if (entryIndex % kIndexValuesPerLine == 0) // beginning of line, 535 plog("\t"); // print tab 536 plog(formatPtr, rangesIndex[entryIndex]); // print values 537 if ((entryIndex + 1) % kIndexValuesPerLine == 0) // end of line, print starting UniChar value 538 plog("// uChar 0x%04X-\n", (u_int16_t)(((entryIndex + 1 - kIndexValuesPerLine) << kLoFieldBitSize) - kShiftUniCharOffset) ); 539 } 540 plog("};\n\n"); 541 542 // print filled in class ranges 543 plog("static const u_int8_t combClassRanges[][kLoFieldEntryCount] = {\n", kLoFieldEntryCount); 544 for (rangeIndex = 0; rangeIndex < rangeCount; rangeIndex++) { 545 plog("\t{\t"); 546 for (entryIndex = 0; entryIndex < kLoFieldEntryCount; entryIndex++) { 547 char * formatPtr = (entryIndex + 1 < kLoFieldEntryCount)? "%3d,": "%3d"; 548 plog(formatPtr, classRanges[rangeIndex][entryIndex]); // print values 549 } 550 plog("\t},\t// uChar 0x%04X-\n", rangesKey[rangeIndex]); 551 } 552 plog("};\n\n"); 553 554 // print filled in repl ranges 555 plog("static const u_int8_t replaceRanges[][kLoFieldEntryCount] = {\n", kLoFieldEntryCount); 556 for (rangeIndex = 0; rangeIndex < rangeCount; rangeIndex++) { 557 plog("\t{\t"); 558 for (entryIndex = 0; entryIndex < kLoFieldEntryCount; entryIndex++) { 559 char * formatPtr = (entryIndex + 1 < kLoFieldEntryCount)? "%3d,": "%3d"; 560 plog(formatPtr, replRanges[rangeIndex][entryIndex]); // print values 561 } 562 plog("\t},\t// uChar 0x%04X-\n", rangesKey[rangeIndex]); 563 } 564 plog("};\n\n"); 565 566 // print filled in replacement data 567 plog("static const u_int16_t replaceData[] = {\n"); 568 for (entryIndex = 0; entryIndex < replDataCount; entryIndex++) { 569 char * formatPtr = (entryIndex + 1 < replDataCount)? "0x%04X,\t": "0x%04X\t"; 570 if (entryIndex % kReplDataValuesPerLine == 0) // beginning of line, 571 plog("\t"); // print tab 572 plog(formatPtr, replacementData[entryIndex]); // print values 573 if ((entryIndex + 1) % kReplDataValuesPerLine == 0 || entryIndex + 1 == replDataCount) // end of line, 574 plog("// index %d-\n", entryIndex & ~(kReplDataValuesPerLine-1) ); // print starting index value 575 } 576 plog("};\n\n"); 577 578 // print summary info 579 plog("// combClassData:\n"); 580 plog("// trimmed index: kHiFieldEntryCount(= %d) bytes\n", kHiFieldEntryCount); 581 plog("// ranges: 2 * %d ranges * kLoFieldEntryCount(= %d) bytes = %d\n", rangeCount, kLoFieldEntryCount, 2*rangeCount*kLoFieldEntryCount); 582 plog("// replData: %d entries * 2 = %d\n", replDataCount, 2*replDataCount); 583 plog("// total: %d\n\n", kHiFieldEntryCount + 2*rangeCount*kLoFieldEntryCount + 2*replDataCount); 584 585 return 0; 586} 587