1/********************************************************************** 2 mktable.c 3**********************************************************************/ 4/*- 5 * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <stdlib.h> 31#include <stdio.h> 32#include <locale.h> 33 34#define __USE_ISOC99 35#include <ctype.h> 36 37#include "regenc.h" 38 39#define ASCII 0 40#define UNICODE_ISO_8859_1 1 41#define ISO_8859_1 2 42#define ISO_8859_2 3 43#define ISO_8859_3 4 44#define ISO_8859_4 5 45#define ISO_8859_5 6 46#define ISO_8859_6 7 47#define ISO_8859_7 8 48#define ISO_8859_8 9 49#define ISO_8859_9 10 50#define ISO_8859_10 11 51#define ISO_8859_11 12 52#define ISO_8859_13 13 53#define ISO_8859_14 14 54#define ISO_8859_15 15 55#define ISO_8859_16 16 56#define KOI8 17 57#define KOI8_R 18 58 59typedef struct { 60 int num; 61 const char* name; 62} ENC_INFO; 63 64static ENC_INFO Info[] = { 65 { ASCII, "ASCII" }, 66 { UNICODE_ISO_8859_1, "UNICODE_ISO_8859_1" }, 67 { ISO_8859_1, "ISO_8859_1" }, 68 { ISO_8859_2, "ISO_8859_2" }, 69 { ISO_8859_3, "ISO_8859_3" }, 70 { ISO_8859_4, "ISO_8859_4" }, 71 { ISO_8859_5, "ISO_8859_5" }, 72 { ISO_8859_6, "ISO_8859_6" }, 73 { ISO_8859_7, "ISO_8859_7" }, 74 { ISO_8859_8, "ISO_8859_8" }, 75 { ISO_8859_9, "ISO_8859_9" }, 76 { ISO_8859_10, "ISO_8859_10" }, 77 { ISO_8859_11, "ISO_8859_11" }, 78 { ISO_8859_13, "ISO_8859_13" }, 79 { ISO_8859_14, "ISO_8859_14" }, 80 { ISO_8859_15, "ISO_8859_15" }, 81 { ISO_8859_16, "ISO_8859_16" }, 82 { KOI8, "KOI8" }, 83 { KOI8_R, "KOI8_R" } 84}; 85 86 87static int IsAlpha(int enc, int c) 88{ 89 if (enc == ASCII) 90 return isalpha(c); 91 92 if (c >= 0x41 && c <= 0x5a) return 1; 93 if (c >= 0x61 && c <= 0x7a) return 1; 94 95 switch (enc) { 96 case UNICODE_ISO_8859_1: 97 case ISO_8859_1: 98 case ISO_8859_9: 99 if (c == 0xaa) return 1; 100 if (c == 0xb5) return 1; 101 if (c == 0xba) return 1; 102 if (c >= 0xc0 && c <= 0xd6) return 1; 103 if (c >= 0xd8 && c <= 0xf6) return 1; 104 if (c >= 0xf8 && c <= 0xff) return 1; 105 break; 106 107 case ISO_8859_2: 108 if (c == 0xa1 || c == 0xa3) return 1; 109 if (c == 0xa5 || c == 0xa6) return 1; 110 if (c >= 0xa9 && c <= 0xac) return 1; 111 if (c >= 0xae && c <= 0xaf) return 1; 112 if (c == 0xb1 || c == 0xb3) return 1; 113 if (c == 0xb5 || c == 0xb6) return 1; 114 if (c >= 0xb9 && c <= 0xbc) return 1; 115 if (c >= 0xbe && c <= 0xbf) return 1; 116 if (c >= 0xc0 && c <= 0xd6) return 1; 117 if (c >= 0xd8 && c <= 0xf6) return 1; 118 if (c >= 0xf8 && c <= 0xfe) return 1; 119 break; 120 121 case ISO_8859_3: 122 if (c == 0xa1) return 1; 123 if (c == 0xa6) return 1; 124 if (c >= 0xa9 && c <= 0xac) return 1; 125 if (c == 0xaf) return 1; 126 if (c == 0xb1) return 1; 127 if (c == 0xb5 || c == 0xb6) return 1; 128 if (c >= 0xb9 && c <= 0xbc) return 1; 129 if (c == 0xbf) return 1; 130 if (c >= 0xc0 && c <= 0xc2) return 1; 131 if (c >= 0xc4 && c <= 0xcf) return 1; 132 if (c >= 0xd1 && c <= 0xd6) return 1; 133 if (c >= 0xd8 && c <= 0xe2) return 1; 134 if (c >= 0xe4 && c <= 0xef) return 1; 135 if (c >= 0xf1 && c <= 0xf6) return 1; 136 if (c >= 0xf8 && c <= 0xfe) return 1; 137 break; 138 139 case ISO_8859_4: 140 if (c >= 0xa1 && c <= 0xa3) return 1; 141 if (c == 0xa5 || c == 0xa6) return 1; 142 if (c >= 0xa9 && c <= 0xac) return 1; 143 if (c == 0xae) return 1; 144 if (c == 0xb1 || c == 0xb3) return 1; 145 if (c == 0xb5 || c == 0xb6) return 1; 146 if (c >= 0xb9 && c <= 0xbf) return 1; 147 if (c >= 0xc0 && c <= 0xd6) return 1; 148 if (c >= 0xd8 && c <= 0xf6) return 1; 149 if (c >= 0xf8 && c <= 0xfe) return 1; 150 break; 151 152 case ISO_8859_5: 153 if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1; 154 if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1; 155 break; 156 157 case ISO_8859_6: 158 if (c >= 0xc1 && c <= 0xda) return 1; 159 if (c >= 0xe0 && c <= 0xf2) return 1; 160 break; 161 162 case ISO_8859_7: 163 if (c == 0xb6) return 1; 164 if (c >= 0xb8 && c <= 0xba) return 1; 165 if (c == 0xbc) return 1; 166 if (c >= 0xbe && c <= 0xbf) return 1; 167 if (c == 0xc0) return 1; 168 if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1; 169 if (c >= 0xdc && c <= 0xfe) return 1; 170 break; 171 172 case ISO_8859_8: 173 if (c == 0xb5) return 1; 174 if (c >= 0xe0 && c <= 0xfa) return 1; 175 break; 176 177 case ISO_8859_10: 178 if (c >= 0xa1 && c <= 0xa6) return 1; 179 if (c >= 0xa8 && c <= 0xac) return 1; 180 if (c == 0xae || c == 0xaf) return 1; 181 if (c >= 0xb1 && c <= 0xb6) return 1; 182 if (c >= 0xb8 && c <= 0xbc) return 1; 183 if (c >= 0xbe && c <= 0xff) return 1; 184 break; 185 186 case ISO_8859_11: 187 if (c >= 0xa1 && c <= 0xda) return 1; 188 if (c >= 0xdf && c <= 0xfb) return 1; 189 break; 190 191 case ISO_8859_13: 192 if (c == 0xa8) return 1; 193 if (c == 0xaa) return 1; 194 if (c == 0xaf) return 1; 195 if (c == 0xb5) return 1; 196 if (c == 0xb8) return 1; 197 if (c == 0xba) return 1; 198 if (c >= 0xbf && c <= 0xd6) return 1; 199 if (c >= 0xd8 && c <= 0xf6) return 1; 200 if (c >= 0xf8 && c <= 0xfe) return 1; 201 break; 202 203 case ISO_8859_14: 204 if (c == 0xa1 || c == 0xa2) return 1; 205 if (c == 0xa4 || c == 0xa5) return 1; 206 if (c == 0xa6 || c == 0xa8) return 1; 207 if (c >= 0xaa && c <= 0xac) return 1; 208 if (c >= 0xaf && c <= 0xb5) return 1; 209 if (c >= 0xb7 && c <= 0xff) return 1; 210 break; 211 212 case ISO_8859_15: 213 if (c == 0xaa) return 1; 214 if (c == 0xb5) return 1; 215 if (c == 0xba) return 1; 216 if (c >= 0xc0 && c <= 0xd6) return 1; 217 if (c >= 0xd8 && c <= 0xf6) return 1; 218 if (c >= 0xf8 && c <= 0xff) return 1; 219 if (c == 0xa6) return 1; 220 if (c == 0xa8) return 1; 221 if (c == 0xb4) return 1; 222 if (c == 0xb8) return 1; 223 if (c == 0xbc) return 1; 224 if (c == 0xbd) return 1; 225 if (c == 0xbe) return 1; 226 break; 227 228 case ISO_8859_16: 229 if (c == 0xa1) return 1; 230 if (c == 0xa2) return 1; 231 if (c == 0xa3) return 1; 232 if (c == 0xa6) return 1; 233 if (c == 0xa8) return 1; 234 if (c == 0xaa) return 1; 235 if (c == 0xac) return 1; 236 if (c == 0xae) return 1; 237 if (c == 0xaf) return 1; 238 if (c == 0xb2) return 1; 239 if (c == 0xb3) return 1; 240 if (c == 0xb4) return 1; 241 if (c >= 0xb8 && c <= 0xba) return 1; 242 if (c == 0xbc) return 1; 243 if (c == 0xbd) return 1; 244 if (c == 0xbe) return 1; 245 if (c == 0xbf) return 1; 246 if (c >= 0xc0 && c <= 0xde) return 1; 247 if (c >= 0xdf && c <= 0xff) return 1; 248 break; 249 250 case KOI8_R: 251 if (c == 0xa3 || c == 0xb3) return 1; 252 /* fall */ 253 case KOI8: 254 if (c >= 0xc0 && c <= 0xff) return 1; 255 break; 256 257 default: 258 exit(-1); 259 } 260 261 return 0; 262} 263 264static int IsBlank(int enc, int c) 265{ 266 if (enc == ASCII) 267 return isblank(c); 268 269 if (c == 0x09 || c == 0x20) return 1; 270 271 switch (enc) { 272 case UNICODE_ISO_8859_1: 273 case ISO_8859_1: 274 case ISO_8859_2: 275 case ISO_8859_3: 276 case ISO_8859_4: 277 case ISO_8859_5: 278 case ISO_8859_6: 279 case ISO_8859_7: 280 case ISO_8859_8: 281 case ISO_8859_9: 282 case ISO_8859_10: 283 case ISO_8859_11: 284 case ISO_8859_13: 285 case ISO_8859_14: 286 case ISO_8859_15: 287 case ISO_8859_16: 288 case KOI8: 289 if (c == 0xa0) return 1; 290 break; 291 292 case KOI8_R: 293 if (c == 0x9a) return 1; 294 break; 295 296 default: 297 exit(-1); 298 } 299 300 return 0; 301} 302 303static int IsCntrl(int enc, int c) 304{ 305 if (enc == ASCII) 306 return iscntrl(c); 307 308 if (c >= 0x00 && c <= 0x1F) return 1; 309 310 switch (enc) { 311 case UNICODE_ISO_8859_1: 312 if (c == 0xad) return 1; 313 /* fall */ 314 case ISO_8859_1: 315 case ISO_8859_2: 316 case ISO_8859_3: 317 case ISO_8859_4: 318 case ISO_8859_5: 319 case ISO_8859_6: 320 case ISO_8859_7: 321 case ISO_8859_8: 322 case ISO_8859_9: 323 case ISO_8859_10: 324 case ISO_8859_11: 325 case ISO_8859_13: 326 case ISO_8859_14: 327 case ISO_8859_15: 328 case ISO_8859_16: 329 case KOI8: 330 if (c >= 0x7f && c <= 0x9F) return 1; 331 break; 332 333 334 case KOI8_R: 335 if (c == 0x7f) return 1; 336 break; 337 338 default: 339 exit(-1); 340 } 341 342 return 0; 343} 344 345static int IsDigit(int enc ARG_UNUSED, int c) 346{ 347 if (c >= 0x30 && c <= 0x39) return 1; 348 return 0; 349} 350 351static int IsGraph(int enc, int c) 352{ 353 if (enc == ASCII) 354 return isgraph(c); 355 356 if (c >= 0x21 && c <= 0x7e) return 1; 357 358 switch (enc) { 359 case UNICODE_ISO_8859_1: 360 case ISO_8859_1: 361 case ISO_8859_2: 362 case ISO_8859_4: 363 case ISO_8859_5: 364 case ISO_8859_9: 365 case ISO_8859_10: 366 case ISO_8859_13: 367 case ISO_8859_14: 368 case ISO_8859_15: 369 case ISO_8859_16: 370 if (c >= 0xa1 && c <= 0xff) return 1; 371 break; 372 373 case ISO_8859_3: 374 if (c >= 0xa1) { 375 if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 || 376 c == 0xe3 || c == 0xf0) 377 return 0; 378 else 379 return 1; 380 } 381 break; 382 383 case ISO_8859_6: 384 if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf) 385 return 1; 386 if (c >= 0xc1 && c <= 0xda) return 1; 387 if (c >= 0xe0 && c <= 0xf2) return 1; 388 break; 389 390 case ISO_8859_7: 391 if (c >= 0xa1 && c <= 0xfe && 392 c != 0xa4 && c != 0xa5 && c != 0xaa && 393 c != 0xae && c != 0xd2) return 1; 394 break; 395 396 case ISO_8859_8: 397 if (c >= 0xa2 && c <= 0xfa) { 398 if (c >= 0xbf && c <= 0xde) return 0; 399 return 1; 400 } 401 break; 402 403 case ISO_8859_11: 404 if (c >= 0xa1 && c <= 0xda) return 1; 405 if (c >= 0xdf && c <= 0xfb) return 1; 406 break; 407 408 case KOI8: 409 if (c >= 0xc0 && c <= 0xff) return 1; 410 break; 411 412 case KOI8_R: 413 if (c >= 0x80 && c <= 0xff && c != 0x9a) return 1; 414 break; 415 416 default: 417 exit(-1); 418 } 419 420 return 0; 421} 422 423static int IsLower(int enc, int c) 424{ 425 if (enc == ASCII) 426 return islower(c); 427 428 if (c >= 0x61 && c <= 0x7a) return 1; 429 430 switch (enc) { 431 case UNICODE_ISO_8859_1: 432 case ISO_8859_1: 433 case ISO_8859_9: 434 if (c == 0xaa) return 1; 435 if (c == 0xb5) return 1; 436 if (c == 0xba) return 1; 437 if (c >= 0xdf && c <= 0xf6) return 1; 438 if (c >= 0xf8 && c <= 0xff) return 1; 439 break; 440 441 case ISO_8859_2: 442 if (c == 0xb1 || c == 0xb3) return 1; 443 if (c == 0xb5 || c == 0xb6) return 1; 444 if (c >= 0xb9 && c <= 0xbc) return 1; 445 if (c >= 0xbe && c <= 0xbf) return 1; 446 if (c >= 0xdf && c <= 0xf6) return 1; 447 if (c >= 0xf8 && c <= 0xfe) return 1; 448 break; 449 450 case ISO_8859_3: 451 if (c == 0xb1) return 1; 452 if (c == 0xb5 || c == 0xb6) return 1; 453 if (c >= 0xb9 && c <= 0xbc) return 1; 454 if (c == 0xbf) return 1; 455 if (c == 0xdf) return 1; 456 if (c >= 0xe0 && c <= 0xe2) return 1; 457 if (c >= 0xe4 && c <= 0xef) return 1; 458 if (c >= 0xf1 && c <= 0xf6) return 1; 459 if (c >= 0xf8 && c <= 0xfe) return 1; 460 break; 461 462 case ISO_8859_4: 463 if (c == 0xa2) return 1; 464 if (c == 0xb1 || c == 0xb3) return 1; 465 if (c == 0xb5 || c == 0xb6) return 1; 466 if (c >= 0xb9 && c <= 0xbc) return 1; 467 if (c >= 0xbe && c <= 0xbf) return 1; 468 if (c == 0xdf) return 1; 469 if (c >= 0xe0 && c <= 0xf6) return 1; 470 if (c >= 0xf8 && c <= 0xfe) return 1; 471 break; 472 473 case ISO_8859_5: 474 if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1; 475 break; 476 477 case ISO_8859_6: 478 break; 479 480 case ISO_8859_7: 481 if (c == 0xc0) return 1; 482 if (c >= 0xdc && c <= 0xfe) return 1; 483 break; 484 485 case ISO_8859_8: 486 if (c == 0xb5) return 1; 487 break; 488 489 case ISO_8859_10: 490 if (c >= 0xb1 && c <= 0xb6) return 1; 491 if (c >= 0xb8 && c <= 0xbc) return 1; 492 if (c == 0xbe || c == 0xbf) return 1; 493 if (c >= 0xdf && c <= 0xff) return 1; 494 break; 495 496 case ISO_8859_11: 497 break; 498 499 case ISO_8859_13: 500 if (c == 0xb5) return 1; 501 if (c == 0xb8) return 1; 502 if (c == 0xba) return 1; 503 if (c == 0xbf) return 1; 504 if (c >= 0xdf && c <= 0xf6) return 1; 505 if (c >= 0xf8 && c <= 0xfe) return 1; 506 break; 507 508 case ISO_8859_14: 509 if (c == 0xa2) return 1; 510 if (c == 0xa5) return 1; 511 if (c == 0xab) return 1; 512 if (c == 0xb1 || c == 0xb3 || c == 0xb5) return 1; 513 if (c >= 0xb8 && c <= 0xba) return 1; 514 if (c == 0xbc) return 1; 515 if (c == 0xbe || c == 0xbf) return 1; 516 if (c >= 0xdf && c <= 0xff) return 1; 517 break; 518 519 case ISO_8859_15: 520 if (c == 0xaa) return 1; 521 if (c == 0xb5) return 1; 522 if (c == 0xba) return 1; 523 if (c >= 0xdf && c <= 0xf6) return 1; 524 if (c >= 0xf8 && c <= 0xff) return 1; 525 if (c == 0xa8) return 1; 526 if (c == 0xb8) return 1; 527 if (c == 0xbd) return 1; 528 break; 529 530 case ISO_8859_16: 531 if (c == 0xa2) return 1; 532 if (c == 0xa8) return 1; 533 if (c == 0xae) return 1; 534 if (c == 0xb3) return 1; 535 if (c >= 0xb8 && c <= 0xba) return 1; 536 if (c == 0xbd) return 1; 537 if (c == 0xbf) return 1; 538 if (c >= 0xdf && c <= 0xff) return 1; 539 break; 540 541 case KOI8_R: 542 if (c == 0xa3) return 1; 543 /* fall */ 544 case KOI8: 545 if (c >= 0xc0 && c <= 0xdf) return 1; 546 break; 547 548 default: 549 exit(-1); 550 } 551 552 return 0; 553} 554 555static int IsPrint(int enc, int c) 556{ 557 if (enc == ASCII) 558 return isprint(c); 559 560 if (c >= 0x20 && c <= 0x7e) return 1; 561 562 switch (enc) { 563 case UNICODE_ISO_8859_1: 564 /* if (c >= 0x09 && c <= 0x0d) return 1; */ 565 if (c == 0x85) return 1; 566 /* fall */ 567 case ISO_8859_1: 568 case ISO_8859_2: 569 case ISO_8859_4: 570 case ISO_8859_5: 571 case ISO_8859_9: 572 case ISO_8859_10: 573 case ISO_8859_13: 574 case ISO_8859_14: 575 case ISO_8859_15: 576 case ISO_8859_16: 577 if (c >= 0xa0 && c <= 0xff) return 1; 578 break; 579 580 case ISO_8859_3: 581 if (c >= 0xa0) { 582 if (c == 0xa5 || c == 0xae || c == 0xbe || c == 0xc3 || c == 0xd0 || 583 c == 0xe3 || c == 0xf0) 584 return 0; 585 else 586 return 1; 587 } 588 break; 589 590 case ISO_8859_6: 591 if (c == 0xa0) return 1; 592 if (c == 0xa4 || c == 0xac || c == 0xad || c == 0xbb || c == 0xbf) 593 return 1; 594 if (c >= 0xc1 && c <= 0xda) return 1; 595 if (c >= 0xe0 && c <= 0xf2) return 1; 596 break; 597 598 case ISO_8859_7: 599 if (c >= 0xa0 && c <= 0xfe && 600 c != 0xa4 && c != 0xa5 && c != 0xaa && 601 c != 0xae && c != 0xd2) return 1; 602 break; 603 604 case ISO_8859_8: 605 if (c >= 0xa0 && c <= 0xfa) { 606 if (c >= 0xbf && c <= 0xde) return 0; 607 if (c == 0xa1) return 0; 608 return 1; 609 } 610 break; 611 612 case ISO_8859_11: 613 if (c >= 0xa0 && c <= 0xda) return 1; 614 if (c >= 0xdf && c <= 0xfb) return 1; 615 break; 616 617 case KOI8: 618 if (c == 0xa0) return 1; 619 if (c >= 0xc0 && c <= 0xff) return 1; 620 break; 621 622 case KOI8_R: 623 if (c >= 0x80 && c <= 0xff) return 1; 624 break; 625 626 default: 627 exit(-1); 628 } 629 630 return 0; 631} 632 633static int IsPunct(int enc, int c) 634{ 635 if (enc == ASCII) 636 return ispunct(c); 637 638 if (enc == UNICODE_ISO_8859_1) { 639 if (c == 0x24 || c == 0x2b || c == 0x5e || c == 0x60 || 640 c == 0x7c || c == 0x7e) return 1; 641 if (c >= 0x3c && c <= 0x3e) return 1; 642 } 643 644 if (c >= 0x21 && c <= 0x2f) return 1; 645 if (c >= 0x3a && c <= 0x40) return 1; 646 if (c >= 0x5b && c <= 0x60) return 1; 647 if (c >= 0x7b && c <= 0x7e) return 1; 648 649 switch (enc) { 650 case ISO_8859_1: 651 case ISO_8859_9: 652 case ISO_8859_15: 653 if (c == 0xad) return 1; 654 /* fall */ 655 case UNICODE_ISO_8859_1: 656 if (c == 0xa1) return 1; 657 if (c == 0xab) return 1; 658 if (c == 0xb7) return 1; 659 if (c == 0xbb) return 1; 660 if (c == 0xbf) return 1; 661 break; 662 663 case ISO_8859_2: 664 case ISO_8859_4: 665 case ISO_8859_5: 666 case ISO_8859_14: 667 if (c == 0xad) return 1; 668 break; 669 670 case ISO_8859_3: 671 case ISO_8859_10: 672 if (c == 0xad) return 1; 673 if (c == 0xb7) return 1; 674 if (c == 0xbd) return 1; 675 break; 676 677 case ISO_8859_6: 678 if (c == 0xac) return 1; 679 if (c == 0xad) return 1; 680 if (c == 0xbb) return 1; 681 if (c == 0xbf) return 1; 682 break; 683 684 case ISO_8859_7: 685 if (c == 0xa1 || c == 0xa2) return 1; 686 if (c == 0xab) return 1; 687 if (c == 0xaf) return 1; 688 if (c == 0xad) return 1; 689 if (c == 0xb7 || c == 0xbb) return 1; 690 break; 691 692 case ISO_8859_8: 693 if (c == 0xab) return 1; 694 if (c == 0xad) return 1; 695 if (c == 0xb7) return 1; 696 if (c == 0xbb) return 1; 697 if (c == 0xdf) return 1; 698 break; 699 700 case ISO_8859_13: 701 if (c == 0xa1 || c == 0xa5) return 1; 702 if (c == 0xab || c == 0xad) return 1; 703 if (c == 0xb4 || c == 0xb7) return 1; 704 if (c == 0xbb) return 1; 705 if (c == 0xff) return 1; 706 break; 707 708 case ISO_8859_16: 709 if (c == 0xa5) return 1; 710 if (c == 0xab) return 1; 711 if (c == 0xad) return 1; 712 if (c == 0xb5) return 1; 713 if (c == 0xb7) return 1; 714 if (c == 0xbb) return 1; 715 break; 716 717 case KOI8_R: 718 if (c == 0x9e) return 1; 719 break; 720 721 case ISO_8859_11: 722 case KOI8: 723 break; 724 725 default: 726 exit(-1); 727 } 728 729 return 0; 730} 731 732static int IsSpace(int enc, int c) 733{ 734 if (enc == ASCII) 735 return isspace(c); 736 737 if (c >= 0x09 && c <= 0x0d) return 1; 738 if (c == 0x20) return 1; 739 740 switch (enc) { 741 case UNICODE_ISO_8859_1: 742 if (c == 0x85) return 1; 743 /* fall */ 744 case ISO_8859_1: 745 case ISO_8859_2: 746 case ISO_8859_3: 747 case ISO_8859_4: 748 case ISO_8859_5: 749 case ISO_8859_6: 750 case ISO_8859_7: 751 case ISO_8859_8: 752 case ISO_8859_9: 753 case ISO_8859_10: 754 case ISO_8859_11: 755 case ISO_8859_13: 756 case ISO_8859_14: 757 case ISO_8859_15: 758 case ISO_8859_16: 759 case KOI8: 760 if (c == 0xa0) return 1; 761 break; 762 763 case KOI8_R: 764 if (c == 0x9a) return 1; 765 break; 766 767 default: 768 exit(-1); 769 } 770 771 return 0; 772} 773 774static int IsUpper(int enc, int c) 775{ 776 if (enc == ASCII) 777 return isupper(c); 778 779 if (c >= 0x41 && c <= 0x5a) return 1; 780 781 switch (enc) { 782 case UNICODE_ISO_8859_1: 783 case ISO_8859_1: 784 case ISO_8859_9: 785 if (c >= 0xc0 && c <= 0xd6) return 1; 786 if (c >= 0xd8 && c <= 0xde) return 1; 787 break; 788 789 case ISO_8859_2: 790 if (c == 0xa1 || c == 0xa3) return 1; 791 if (c == 0xa5 || c == 0xa6) return 1; 792 if (c >= 0xa9 && c <= 0xac) return 1; 793 if (c >= 0xae && c <= 0xaf) return 1; 794 if (c >= 0xc0 && c <= 0xd6) return 1; 795 if (c >= 0xd8 && c <= 0xde) return 1; 796 break; 797 798 case ISO_8859_3: 799 if (c == 0xa1) return 1; 800 if (c == 0xa6) return 1; 801 if (c >= 0xa9 && c <= 0xac) return 1; 802 if (c == 0xaf) return 1; 803 if (c >= 0xc0 && c <= 0xc2) return 1; 804 if (c >= 0xc4 && c <= 0xcf) return 1; 805 if (c >= 0xd1 && c <= 0xd6) return 1; 806 if (c >= 0xd8 && c <= 0xde) return 1; 807 break; 808 809 case ISO_8859_4: 810 if (c == 0xa1 || c == 0xa3) return 1; 811 if (c == 0xa5 || c == 0xa6) return 1; 812 if (c >= 0xa9 && c <= 0xac) return 1; 813 if (c == 0xae) return 1; 814 if (c == 0xbd) return 1; 815 if (c >= 0xc0 && c <= 0xd6) return 1; 816 if (c >= 0xd8 && c <= 0xde) return 1; 817 break; 818 819 case ISO_8859_5: 820 if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1; 821 break; 822 823 case ISO_8859_6: 824 break; 825 826 case ISO_8859_7: 827 if (c == 0xb6) return 1; 828 if (c >= 0xb8 && c <= 0xba) return 1; 829 if (c == 0xbc) return 1; 830 if (c >= 0xbe && c <= 0xbf) return 1; 831 if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1; 832 break; 833 834 case ISO_8859_8: 835 case ISO_8859_11: 836 break; 837 838 case ISO_8859_10: 839 if (c >= 0xa1 && c <= 0xa6) return 1; 840 if (c >= 0xa8 && c <= 0xac) return 1; 841 if (c == 0xae || c == 0xaf) return 1; 842 if (c >= 0xc0 && c <= 0xde) return 1; 843 break; 844 845 case ISO_8859_13: 846 if (c == 0xa8) return 1; 847 if (c == 0xaa) return 1; 848 if (c == 0xaf) return 1; 849 if (c >= 0xc0 && c <= 0xd6) return 1; 850 if (c >= 0xd8 && c <= 0xde) return 1; 851 break; 852 853 case ISO_8859_14: 854 if (c == 0xa1) return 1; 855 if (c == 0xa4 || c == 0xa6) return 1; 856 if (c == 0xa8) return 1; 857 if (c == 0xaa || c == 0xac) return 1; 858 if (c == 0xaf || c == 0xb0) return 1; 859 if (c == 0xb2 || c == 0xb4 || c == 0xb7) return 1; 860 if (c == 0xbb || c == 0xbd) return 1; 861 if (c >= 0xc0 && c <= 0xde) return 1; 862 break; 863 864 case ISO_8859_15: 865 if (c >= 0xc0 && c <= 0xd6) return 1; 866 if (c >= 0xd8 && c <= 0xde) return 1; 867 if (c == 0xa6) return 1; 868 if (c == 0xb4) return 1; 869 if (c == 0xbc) return 1; 870 if (c == 0xbe) return 1; 871 break; 872 873 case ISO_8859_16: 874 if (c == 0xa1) return 1; 875 if (c == 0xa3) return 1; 876 if (c == 0xa6) return 1; 877 if (c == 0xaa) return 1; 878 if (c == 0xac) return 1; 879 if (c == 0xaf) return 1; 880 if (c == 0xb2) return 1; 881 if (c == 0xb4) return 1; 882 if (c == 0xbc) return 1; 883 if (c == 0xbe) return 1; 884 if (c >= 0xc0 && c <= 0xde) return 1; 885 break; 886 887 case KOI8_R: 888 if (c == 0xb3) return 1; 889 /* fall */ 890 case KOI8: 891 if (c >= 0xe0 && c <= 0xff) return 1; 892 break; 893 894 default: 895 exit(-1); 896 } 897 898 return 0; 899} 900 901static int IsXDigit(int enc, int c) 902{ 903 if (enc == ASCII) 904 return isxdigit(c); 905 906 if (c >= 0x30 && c <= 0x39) return 1; 907 if (c >= 0x41 && c <= 0x46) return 1; 908 if (c >= 0x61 && c <= 0x66) return 1; 909 return 0; 910} 911 912static int IsWord(int enc, int c) 913{ 914 if (enc == ASCII) { 915 return (isalpha(c) || isdigit(c) || c == 0x5f); 916 } 917 918 if (c >= 0x30 && c <= 0x39) return 1; 919 if (c >= 0x41 && c <= 0x5a) return 1; 920 if (c == 0x5f) return 1; 921 if (c >= 0x61 && c <= 0x7a) return 1; 922 923 switch (enc) { 924 case UNICODE_ISO_8859_1: 925 case ISO_8859_1: 926 case ISO_8859_9: 927 if (c == 0xaa) return 1; 928 if (c >= 0xb2 && c <= 0xb3) return 1; 929 if (c == 0xb5) return 1; 930 if (c >= 0xb9 && c <= 0xba) return 1; 931 if (c >= 0xbc && c <= 0xbe) return 1; 932 if (c >= 0xc0 && c <= 0xd6) return 1; 933 if (c >= 0xd8 && c <= 0xf6) return 1; 934 if (c >= 0xf8 && c <= 0xff) return 1; 935 break; 936 937 case ISO_8859_2: 938 if (c == 0xa1 || c == 0xa3) return 1; 939 if (c == 0xa5 || c == 0xa6) return 1; 940 if (c >= 0xa9 && c <= 0xac) return 1; 941 if (c >= 0xae && c <= 0xaf) return 1; 942 if (c == 0xb1 || c == 0xb3) return 1; 943 if (c == 0xb5 || c == 0xb6) return 1; 944 if (c >= 0xb9 && c <= 0xbc) return 1; 945 if (c >= 0xbe && c <= 0xbf) return 1; 946 if (c >= 0xc0 && c <= 0xd6) return 1; 947 if (c >= 0xd8 && c <= 0xf6) return 1; 948 if (c >= 0xf8 && c <= 0xfe) return 1; 949 break; 950 951 case ISO_8859_3: 952 if (c == 0xa1) return 1; 953 if (c == 0xa6) return 1; 954 if (c >= 0xa9 && c <= 0xac) return 1; 955 if (c == 0xaf) return 1; 956 if (c >= 0xb1 && c <= 0xb3) return 1; 957 if (c == 0xb5 || c == 0xb6) return 1; 958 if (c >= 0xb9 && c <= 0xbd) return 1; 959 if (c == 0xbf) return 1; 960 if (c >= 0xc0 && c <= 0xc2) return 1; 961 if (c >= 0xc4 && c <= 0xcf) return 1; 962 if (c >= 0xd1 && c <= 0xd6) return 1; 963 if (c >= 0xd8 && c <= 0xe2) return 1; 964 if (c >= 0xe4 && c <= 0xef) return 1; 965 if (c >= 0xf1 && c <= 0xf6) return 1; 966 if (c >= 0xf8 && c <= 0xfe) return 1; 967 break; 968 969 case ISO_8859_4: 970 if (c >= 0xa1 && c <= 0xa3) return 1; 971 if (c == 0xa5 || c == 0xa6) return 1; 972 if (c >= 0xa9 && c <= 0xac) return 1; 973 if (c == 0xae) return 1; 974 if (c == 0xb1 || c == 0xb3) return 1; 975 if (c == 0xb5 || c == 0xb6) return 1; 976 if (c >= 0xb9 && c <= 0xbf) return 1; 977 if (c >= 0xc0 && c <= 0xd6) return 1; 978 if (c >= 0xd8 && c <= 0xf6) return 1; 979 if (c >= 0xf8 && c <= 0xfe) return 1; 980 break; 981 982 case ISO_8859_5: 983 if (c >= 0xa1 && c <= 0xcf && c != 0xad) return 1; 984 if (c >= 0xd0 && c <= 0xff && c != 0xf0 && c != 0xfd) return 1; 985 break; 986 987 case ISO_8859_6: 988 if (c >= 0xc1 && c <= 0xda) return 1; 989 if (c >= 0xe0 && c <= 0xea) return 1; 990 if (c >= 0xeb && c <= 0xf2) return 1; 991 break; 992 993 case ISO_8859_7: 994 if (c == 0xb2 || c == 0xb3) return 1; 995 if (c == 0xb6) return 1; 996 if (c >= 0xb8 && c <= 0xba) return 1; 997 if (c >= 0xbc && c <= 0xbf) return 1; 998 if (c == 0xc0) return 1; 999 if (c >= 0xc1 && c <= 0xdb && c != 0xd2) return 1; 1000 if (c >= 0xdc && c <= 0xfe) return 1; 1001 break; 1002 1003 case ISO_8859_8: 1004 if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1; 1005 if (c >= 0xbc && c <= 0xbe) return 1; 1006 if (c >= 0xe0 && c <= 0xfa) return 1; 1007 break; 1008 1009 case ISO_8859_10: 1010 if (c >= 0xa1 && c <= 0xff) { 1011 if (c != 0xa7 && c != 0xad && c != 0xb0 && c != 0xb7 && c != 0xbd) 1012 return 1; 1013 } 1014 break; 1015 1016 case ISO_8859_11: 1017 if (c >= 0xa1 && c <= 0xda) return 1; 1018 if (c >= 0xdf && c <= 0xfb) return 1; 1019 break; 1020 1021 case ISO_8859_13: 1022 if (c == 0xa8) return 1; 1023 if (c == 0xaa) return 1; 1024 if (c == 0xaf) return 1; 1025 if (c == 0xb2 || c == 0xb3 || c == 0xb5 || c == 0xb9) return 1; 1026 if (c >= 0xbc && c <= 0xbe) return 1; 1027 if (c == 0xb8) return 1; 1028 if (c == 0xba) return 1; 1029 if (c >= 0xbf && c <= 0xd6) return 1; 1030 if (c >= 0xd8 && c <= 0xf6) return 1; 1031 if (c >= 0xf8 && c <= 0xfe) return 1; 1032 break; 1033 1034 case ISO_8859_14: 1035 if (c >= 0xa1 && c <= 0xff) { 1036 if (c == 0xa3 || c == 0xa7 || c == 0xa9 || c == 0xad || c == 0xae || 1037 c == 0xb6) return 0; 1038 return 1; 1039 } 1040 break; 1041 1042 case ISO_8859_15: 1043 if (c == 0xaa) return 1; 1044 if (c >= 0xb2 && c <= 0xb3) return 1; 1045 if (c == 0xb5) return 1; 1046 if (c >= 0xb9 && c <= 0xba) return 1; 1047 if (c >= 0xbc && c <= 0xbe) return 1; 1048 if (c >= 0xc0 && c <= 0xd6) return 1; 1049 if (c >= 0xd8 && c <= 0xf6) return 1; 1050 if (c >= 0xf8 && c <= 0xff) return 1; 1051 if (c == 0xa6) return 1; 1052 if (c == 0xa8) return 1; 1053 if (c == 0xb4) return 1; 1054 if (c == 0xb8) return 1; 1055 break; 1056 1057 case ISO_8859_16: 1058 if (c == 0xa1) return 1; 1059 if (c == 0xa2) return 1; 1060 if (c == 0xa3) return 1; 1061 if (c == 0xa6) return 1; 1062 if (c == 0xa8) return 1; 1063 if (c == 0xaa) return 1; 1064 if (c == 0xac) return 1; 1065 if (c == 0xae) return 1; 1066 if (c == 0xaf) return 1; 1067 if (c == 0xb2) return 1; 1068 if (c == 0xb3) return 1; 1069 if (c == 0xb4) return 1; 1070 if (c >= 0xb8 && c <= 0xba) return 1; 1071 if (c == 0xbc) return 1; 1072 if (c == 0xbd) return 1; 1073 if (c == 0xbe) return 1; 1074 if (c == 0xbf) return 1; 1075 if (c >= 0xc0 && c <= 0xde) return 1; 1076 if (c >= 0xdf && c <= 0xff) return 1; 1077 break; 1078 1079 case KOI8_R: 1080 if (c == 0x9d) return 1; 1081 if (c == 0xa3 || c == 0xb3) return 1; 1082 /* fall */ 1083 case KOI8: 1084 if (c >= 0xc0 && c <= 0xff) return 1; 1085 break; 1086 1087 default: 1088 exit(-1); 1089 } 1090 1091 return 0; 1092} 1093 1094static int IsAscii(int enc ARG_UNUSED, int c) 1095{ 1096 if (c >= 0x00 && c <= 0x7f) return 1; 1097 return 0; 1098} 1099 1100static int IsNewline(int enc ARG_UNUSED, int c) 1101{ 1102 if (c == 0x0a) return 1; 1103 return 0; 1104} 1105 1106static int exec(FILE* fp, ENC_INFO* einfo) 1107{ 1108#define NCOL 8 1109 1110 int c, val, enc; 1111 1112 enc = einfo->num; 1113 1114 fprintf(fp, "static const unsigned short Enc%s_CtypeTable[256] = {\n", 1115 einfo->name); 1116 1117 for (c = 0; c < 256; c++) { 1118 val = 0; 1119 if (IsNewline(enc, c)) val |= BIT_CTYPE_NEWLINE; 1120 if (IsAlpha (enc, c)) val |= (BIT_CTYPE_ALPHA | BIT_CTYPE_ALNUM); 1121 if (IsBlank (enc, c)) val |= BIT_CTYPE_BLANK; 1122 if (IsCntrl (enc, c)) val |= BIT_CTYPE_CNTRL; 1123 if (IsDigit (enc, c)) val |= (BIT_CTYPE_DIGIT | BIT_CTYPE_ALNUM); 1124 if (IsGraph (enc, c)) val |= BIT_CTYPE_GRAPH; 1125 if (IsLower (enc, c)) val |= BIT_CTYPE_LOWER; 1126 if (IsPrint (enc, c)) val |= BIT_CTYPE_PRINT; 1127 if (IsPunct (enc, c)) val |= BIT_CTYPE_PUNCT; 1128 if (IsSpace (enc, c)) val |= BIT_CTYPE_SPACE; 1129 if (IsUpper (enc, c)) val |= BIT_CTYPE_UPPER; 1130 if (IsXDigit(enc, c)) val |= BIT_CTYPE_XDIGIT; 1131 if (IsWord (enc, c)) val |= BIT_CTYPE_WORD; 1132 if (IsAscii (enc, c)) val |= BIT_CTYPE_ASCII; 1133 1134 if (c % NCOL == 0) fputs(" ", fp); 1135 fprintf(fp, "0x%04x", val); 1136 if (c != 255) fputs(",", fp); 1137 if (c != 0 && c % NCOL == (NCOL-1)) 1138 fputs("\n", fp); 1139 else 1140 fputs(" ", fp); 1141 } 1142 fprintf(fp, "};\n"); 1143 return 0; 1144} 1145 1146extern int main(int argc ARG_UNUSED, char* argv[] ARG_UNUSED) 1147{ 1148 int i; 1149 FILE* fp = stdout; 1150 1151 setlocale(LC_ALL, "C"); 1152 /* setlocale(LC_ALL, "POSIX"); */ 1153 /* setlocale(LC_ALL, "en_GB.iso88591"); */ 1154 /* setlocale(LC_ALL, "de_BE.iso88591"); */ 1155 /* setlocale(LC_ALL, "fr_FR.iso88591"); */ 1156 1157 for (i = 0; i < (int )(sizeof(Info)/sizeof(ENC_INFO)); i++) { 1158 exec(fp, &Info[i]); 1159 } 1160 1161 return 0; 1162} 1163