1/* vi:set ts=8 sts=4 sw=4: 2 * 3 * VIM - Vi IMproved by Bram Moolenaar 4 * 5 * Do ":help uganda" in Vim to read copying and usage conditions. 6 * Do ":help credits" in Vim to see a list of people who contributed. 7 * See README.txt for an overview of the Vim source code. 8 */ 9 10/* 11 * arabic.c: functions for Arabic language 12 * 13 * Included by main.c, when FEAT_ARABIC & FEAT_GUI is defined. 14 * 15 * -- 16 * 17 * Author: Nadim Shaikli & Isam Bayazidi 18 * 19 */ 20 21static int A_is_a __ARGS((int cur_c)); 22static int A_is_s __ARGS((int cur_c)); 23static int A_is_f __ARGS((int cur_c)); 24static int chg_c_a2s __ARGS((int cur_c)); 25static int chg_c_a2i __ARGS((int cur_c)); 26static int chg_c_a2m __ARGS((int cur_c)); 27static int chg_c_a2f __ARGS((int cur_c)); 28static int chg_c_i2m __ARGS((int cur_c)); 29static int chg_c_f2m __ARGS((int cur_c)); 30static int chg_c_laa2i __ARGS((int hid_c)); 31static int chg_c_laa2f __ARGS((int hid_c)); 32static int half_shape __ARGS((int c)); 33static int A_firstc_laa __ARGS((int c1, int c)); 34static int A_is_harakat __ARGS((int c)); 35static int A_is_iso __ARGS((int c)); 36static int A_is_formb __ARGS((int c)); 37static int A_is_ok __ARGS((int c)); 38static int A_is_valid __ARGS((int c)); 39static int A_is_special __ARGS((int c)); 40 41 42/* 43 * Returns True if c is an ISO-8859-6 shaped ARABIC letter (user entered) 44 */ 45 static int 46A_is_a(cur_c) 47 int cur_c; 48{ 49 switch (cur_c) 50 { 51 case a_HAMZA: 52 case a_ALEF_MADDA: 53 case a_ALEF_HAMZA_ABOVE: 54 case a_WAW_HAMZA: 55 case a_ALEF_HAMZA_BELOW: 56 case a_YEH_HAMZA: 57 case a_ALEF: 58 case a_BEH: 59 case a_TEH_MARBUTA: 60 case a_TEH: 61 case a_THEH: 62 case a_JEEM: 63 case a_HAH: 64 case a_KHAH: 65 case a_DAL: 66 case a_THAL: 67 case a_REH: 68 case a_ZAIN: 69 case a_SEEN: 70 case a_SHEEN: 71 case a_SAD: 72 case a_DAD: 73 case a_TAH: 74 case a_ZAH: 75 case a_AIN: 76 case a_GHAIN: 77 case a_TATWEEL: 78 case a_FEH: 79 case a_QAF: 80 case a_KAF: 81 case a_LAM: 82 case a_MEEM: 83 case a_NOON: 84 case a_HEH: 85 case a_WAW: 86 case a_ALEF_MAKSURA: 87 case a_YEH: 88 return TRUE; 89 } 90 91 return FALSE; 92} 93 94 95/* 96 * Returns True if c is an Isolated Form-B ARABIC letter 97 */ 98 static int 99A_is_s(cur_c) 100 int cur_c; 101{ 102 switch (cur_c) 103 { 104 case a_s_HAMZA: 105 case a_s_ALEF_MADDA: 106 case a_s_ALEF_HAMZA_ABOVE: 107 case a_s_WAW_HAMZA: 108 case a_s_ALEF_HAMZA_BELOW: 109 case a_s_YEH_HAMZA: 110 case a_s_ALEF: 111 case a_s_BEH: 112 case a_s_TEH_MARBUTA: 113 case a_s_TEH: 114 case a_s_THEH: 115 case a_s_JEEM: 116 case a_s_HAH: 117 case a_s_KHAH: 118 case a_s_DAL: 119 case a_s_THAL: 120 case a_s_REH: 121 case a_s_ZAIN: 122 case a_s_SEEN: 123 case a_s_SHEEN: 124 case a_s_SAD: 125 case a_s_DAD: 126 case a_s_TAH: 127 case a_s_ZAH: 128 case a_s_AIN: 129 case a_s_GHAIN: 130 case a_s_FEH: 131 case a_s_QAF: 132 case a_s_KAF: 133 case a_s_LAM: 134 case a_s_MEEM: 135 case a_s_NOON: 136 case a_s_HEH: 137 case a_s_WAW: 138 case a_s_ALEF_MAKSURA: 139 case a_s_YEH: 140 return TRUE; 141 } 142 143 return FALSE; 144} 145 146 147/* 148 * Returns True if c is a Final shape of an ARABIC letter 149 */ 150 static int 151A_is_f(cur_c) 152 int cur_c; 153{ 154 switch (cur_c) 155 { 156 case a_f_ALEF_MADDA: 157 case a_f_ALEF_HAMZA_ABOVE: 158 case a_f_WAW_HAMZA: 159 case a_f_ALEF_HAMZA_BELOW: 160 case a_f_YEH_HAMZA: 161 case a_f_ALEF: 162 case a_f_BEH: 163 case a_f_TEH_MARBUTA: 164 case a_f_TEH: 165 case a_f_THEH: 166 case a_f_JEEM: 167 case a_f_HAH: 168 case a_f_KHAH: 169 case a_f_DAL: 170 case a_f_THAL: 171 case a_f_REH: 172 case a_f_ZAIN: 173 case a_f_SEEN: 174 case a_f_SHEEN: 175 case a_f_SAD: 176 case a_f_DAD: 177 case a_f_TAH: 178 case a_f_ZAH: 179 case a_f_AIN: 180 case a_f_GHAIN: 181 case a_f_FEH: 182 case a_f_QAF: 183 case a_f_KAF: 184 case a_f_LAM: 185 case a_f_MEEM: 186 case a_f_NOON: 187 case a_f_HEH: 188 case a_f_WAW: 189 case a_f_ALEF_MAKSURA: 190 case a_f_YEH: 191 case a_f_LAM_ALEF_MADDA_ABOVE: 192 case a_f_LAM_ALEF_HAMZA_ABOVE: 193 case a_f_LAM_ALEF_HAMZA_BELOW: 194 case a_f_LAM_ALEF: 195 return TRUE; 196 } 197 return FALSE; 198} 199 200 201/* 202 * Change shape - from ISO-8859-6/Isolated to Form-B Isolated 203 */ 204 static int 205chg_c_a2s(cur_c) 206 int cur_c; 207{ 208 int tempc; 209 210 switch (cur_c) 211 { 212 case a_HAMZA: 213 tempc = a_s_HAMZA; 214 break; 215 case a_ALEF_MADDA: 216 tempc = a_s_ALEF_MADDA; 217 break; 218 case a_ALEF_HAMZA_ABOVE: 219 tempc = a_s_ALEF_HAMZA_ABOVE; 220 break; 221 case a_WAW_HAMZA: 222 tempc = a_s_WAW_HAMZA; 223 break; 224 case a_ALEF_HAMZA_BELOW: 225 tempc = a_s_ALEF_HAMZA_BELOW; 226 break; 227 case a_YEH_HAMZA: 228 tempc = a_s_YEH_HAMZA; 229 break; 230 case a_ALEF: 231 tempc = a_s_ALEF; 232 break; 233 case a_TEH_MARBUTA: 234 tempc = a_s_TEH_MARBUTA; 235 break; 236 case a_DAL: 237 tempc = a_s_DAL; 238 break; 239 case a_THAL: 240 tempc = a_s_THAL; 241 break; 242 case a_REH: 243 tempc = a_s_REH; 244 break; 245 case a_ZAIN: 246 tempc = a_s_ZAIN; 247 break; 248 case a_TATWEEL: /* exceptions */ 249 tempc = cur_c; 250 break; 251 case a_WAW: 252 tempc = a_s_WAW; 253 break; 254 case a_ALEF_MAKSURA: 255 tempc = a_s_ALEF_MAKSURA; 256 break; 257 case a_BEH: 258 tempc = a_s_BEH; 259 break; 260 case a_TEH: 261 tempc = a_s_TEH; 262 break; 263 case a_THEH: 264 tempc = a_s_THEH; 265 break; 266 case a_JEEM: 267 tempc = a_s_JEEM; 268 break; 269 case a_HAH: 270 tempc = a_s_HAH; 271 break; 272 case a_KHAH: 273 tempc = a_s_KHAH; 274 break; 275 case a_SEEN: 276 tempc = a_s_SEEN; 277 break; 278 case a_SHEEN: 279 tempc = a_s_SHEEN; 280 break; 281 case a_SAD: 282 tempc = a_s_SAD; 283 break; 284 case a_DAD: 285 tempc = a_s_DAD; 286 break; 287 case a_TAH: 288 tempc = a_s_TAH; 289 break; 290 case a_ZAH: 291 tempc = a_s_ZAH; 292 break; 293 case a_AIN: 294 tempc = a_s_AIN; 295 break; 296 case a_GHAIN: 297 tempc = a_s_GHAIN; 298 break; 299 case a_FEH: 300 tempc = a_s_FEH; 301 break; 302 case a_QAF: 303 tempc = a_s_QAF; 304 break; 305 case a_KAF: 306 tempc = a_s_KAF; 307 break; 308 case a_LAM: 309 tempc = a_s_LAM; 310 break; 311 case a_MEEM: 312 tempc = a_s_MEEM; 313 break; 314 case a_NOON: 315 tempc = a_s_NOON; 316 break; 317 case a_HEH: 318 tempc = a_s_HEH; 319 break; 320 case a_YEH: 321 tempc = a_s_YEH; 322 break; 323 default: 324 tempc = 0; 325 } 326 327 return tempc; 328} 329 330 331/* 332 * Change shape - from ISO-8859-6/Isolated to Initial 333 */ 334 static int 335chg_c_a2i(cur_c) 336 int cur_c; 337{ 338 int tempc; 339 340 switch (cur_c) 341 { 342 case a_YEH_HAMZA: 343 tempc = a_i_YEH_HAMZA; 344 break; 345 case a_HAMZA: /* exceptions */ 346 tempc = a_s_HAMZA; 347 break; 348 case a_ALEF_MADDA: /* exceptions */ 349 tempc = a_s_ALEF_MADDA; 350 break; 351 case a_ALEF_HAMZA_ABOVE: /* exceptions */ 352 tempc = a_s_ALEF_HAMZA_ABOVE; 353 break; 354 case a_WAW_HAMZA: /* exceptions */ 355 tempc = a_s_WAW_HAMZA; 356 break; 357 case a_ALEF_HAMZA_BELOW: /* exceptions */ 358 tempc = a_s_ALEF_HAMZA_BELOW; 359 break; 360 case a_ALEF: /* exceptions */ 361 tempc = a_s_ALEF; 362 break; 363 case a_TEH_MARBUTA: /* exceptions */ 364 tempc = a_s_TEH_MARBUTA; 365 break; 366 case a_DAL: /* exceptions */ 367 tempc = a_s_DAL; 368 break; 369 case a_THAL: /* exceptions */ 370 tempc = a_s_THAL; 371 break; 372 case a_REH: /* exceptions */ 373 tempc = a_s_REH; 374 break; 375 case a_ZAIN: /* exceptions */ 376 tempc = a_s_ZAIN; 377 break; 378 case a_TATWEEL: /* exceptions */ 379 tempc = cur_c; 380 break; 381 case a_WAW: /* exceptions */ 382 tempc = a_s_WAW; 383 break; 384 case a_ALEF_MAKSURA: /* exceptions */ 385 tempc = a_s_ALEF_MAKSURA; 386 break; 387 case a_BEH: 388 tempc = a_i_BEH; 389 break; 390 case a_TEH: 391 tempc = a_i_TEH; 392 break; 393 case a_THEH: 394 tempc = a_i_THEH; 395 break; 396 case a_JEEM: 397 tempc = a_i_JEEM; 398 break; 399 case a_HAH: 400 tempc = a_i_HAH; 401 break; 402 case a_KHAH: 403 tempc = a_i_KHAH; 404 break; 405 case a_SEEN: 406 tempc = a_i_SEEN; 407 break; 408 case a_SHEEN: 409 tempc = a_i_SHEEN; 410 break; 411 case a_SAD: 412 tempc = a_i_SAD; 413 break; 414 case a_DAD: 415 tempc = a_i_DAD; 416 break; 417 case a_TAH: 418 tempc = a_i_TAH; 419 break; 420 case a_ZAH: 421 tempc = a_i_ZAH; 422 break; 423 case a_AIN: 424 tempc = a_i_AIN; 425 break; 426 case a_GHAIN: 427 tempc = a_i_GHAIN; 428 break; 429 case a_FEH: 430 tempc = a_i_FEH; 431 break; 432 case a_QAF: 433 tempc = a_i_QAF; 434 break; 435 case a_KAF: 436 tempc = a_i_KAF; 437 break; 438 case a_LAM: 439 tempc = a_i_LAM; 440 break; 441 case a_MEEM: 442 tempc = a_i_MEEM; 443 break; 444 case a_NOON: 445 tempc = a_i_NOON; 446 break; 447 case a_HEH: 448 tempc = a_i_HEH; 449 break; 450 case a_YEH: 451 tempc = a_i_YEH; 452 break; 453 default: 454 tempc = 0; 455 } 456 457 return tempc; 458} 459 460 461/* 462 * Change shape - from ISO-8859-6/Isolated to Medial 463 */ 464 static int 465chg_c_a2m(cur_c) 466 int cur_c; 467{ 468 int tempc; 469 470 switch (cur_c) 471 { 472 case a_HAMZA: /* exception */ 473 tempc = a_s_HAMZA; 474 break; 475 case a_ALEF_MADDA: /* exception */ 476 tempc = a_f_ALEF_MADDA; 477 break; 478 case a_ALEF_HAMZA_ABOVE: /* exception */ 479 tempc = a_f_ALEF_HAMZA_ABOVE; 480 break; 481 case a_WAW_HAMZA: /* exception */ 482 tempc = a_f_WAW_HAMZA; 483 break; 484 case a_ALEF_HAMZA_BELOW: /* exception */ 485 tempc = a_f_ALEF_HAMZA_BELOW; 486 break; 487 case a_YEH_HAMZA: 488 tempc = a_m_YEH_HAMZA; 489 break; 490 case a_ALEF: /* exception */ 491 tempc = a_f_ALEF; 492 break; 493 case a_BEH: 494 tempc = a_m_BEH; 495 break; 496 case a_TEH_MARBUTA: /* exception */ 497 tempc = a_f_TEH_MARBUTA; 498 break; 499 case a_TEH: 500 tempc = a_m_TEH; 501 break; 502 case a_THEH: 503 tempc = a_m_THEH; 504 break; 505 case a_JEEM: 506 tempc = a_m_JEEM; 507 break; 508 case a_HAH: 509 tempc = a_m_HAH; 510 break; 511 case a_KHAH: 512 tempc = a_m_KHAH; 513 break; 514 case a_DAL: /* exception */ 515 tempc = a_f_DAL; 516 break; 517 case a_THAL: /* exception */ 518 tempc = a_f_THAL; 519 break; 520 case a_REH: /* exception */ 521 tempc = a_f_REH; 522 break; 523 case a_ZAIN: /* exception */ 524 tempc = a_f_ZAIN; 525 break; 526 case a_SEEN: 527 tempc = a_m_SEEN; 528 break; 529 case a_SHEEN: 530 tempc = a_m_SHEEN; 531 break; 532 case a_SAD: 533 tempc = a_m_SAD; 534 break; 535 case a_DAD: 536 tempc = a_m_DAD; 537 break; 538 case a_TAH: 539 tempc = a_m_TAH; 540 break; 541 case a_ZAH: 542 tempc = a_m_ZAH; 543 break; 544 case a_AIN: 545 tempc = a_m_AIN; 546 break; 547 case a_GHAIN: 548 tempc = a_m_GHAIN; 549 break; 550 case a_TATWEEL: /* exception */ 551 tempc = cur_c; 552 break; 553 case a_FEH: 554 tempc = a_m_FEH; 555 break; 556 case a_QAF: 557 tempc = a_m_QAF; 558 break; 559 case a_KAF: 560 tempc = a_m_KAF; 561 break; 562 case a_LAM: 563 tempc = a_m_LAM; 564 break; 565 case a_MEEM: 566 tempc = a_m_MEEM; 567 break; 568 case a_NOON: 569 tempc = a_m_NOON; 570 break; 571 case a_HEH: 572 tempc = a_m_HEH; 573 break; 574 case a_WAW: /* exception */ 575 tempc = a_f_WAW; 576 break; 577 case a_ALEF_MAKSURA: /* exception */ 578 tempc = a_f_ALEF_MAKSURA; 579 break; 580 case a_YEH: 581 tempc = a_m_YEH; 582 break; 583 default: 584 tempc = 0; 585 } 586 587 return tempc; 588} 589 590 591/* 592 * Change shape - from ISO-8859-6/Isolated to final 593 */ 594 static int 595chg_c_a2f(cur_c) 596 int cur_c; 597{ 598 int tempc; 599 600 /* NOTE: these encodings need to be accounted for 601 602 a_f_ALEF_MADDA; 603 a_f_ALEF_HAMZA_ABOVE; 604 a_f_ALEF_HAMZA_BELOW; 605 a_f_LAM_ALEF_MADDA_ABOVE; 606 a_f_LAM_ALEF_HAMZA_ABOVE; 607 a_f_LAM_ALEF_HAMZA_BELOW; 608 */ 609 610 switch (cur_c) 611 { 612 case a_HAMZA: /* exception */ 613 tempc = a_s_HAMZA; 614 break; 615 case a_ALEF_MADDA: 616 tempc = a_f_ALEF_MADDA; 617 break; 618 case a_ALEF_HAMZA_ABOVE: 619 tempc = a_f_ALEF_HAMZA_ABOVE; 620 break; 621 case a_WAW_HAMZA: 622 tempc = a_f_WAW_HAMZA; 623 break; 624 case a_ALEF_HAMZA_BELOW: 625 tempc = a_f_ALEF_HAMZA_BELOW; 626 break; 627 case a_YEH_HAMZA: 628 tempc = a_f_YEH_HAMZA; 629 break; 630 case a_ALEF: 631 tempc = a_f_ALEF; 632 break; 633 case a_BEH: 634 tempc = a_f_BEH; 635 break; 636 case a_TEH_MARBUTA: 637 tempc = a_f_TEH_MARBUTA; 638 break; 639 case a_TEH: 640 tempc = a_f_TEH; 641 break; 642 case a_THEH: 643 tempc = a_f_THEH; 644 break; 645 case a_JEEM: 646 tempc = a_f_JEEM; 647 break; 648 case a_HAH: 649 tempc = a_f_HAH; 650 break; 651 case a_KHAH: 652 tempc = a_f_KHAH; 653 break; 654 case a_DAL: 655 tempc = a_f_DAL; 656 break; 657 case a_THAL: 658 tempc = a_f_THAL; 659 break; 660 case a_REH: 661 tempc = a_f_REH; 662 break; 663 case a_ZAIN: 664 tempc = a_f_ZAIN; 665 break; 666 case a_SEEN: 667 tempc = a_f_SEEN; 668 break; 669 case a_SHEEN: 670 tempc = a_f_SHEEN; 671 break; 672 case a_SAD: 673 tempc = a_f_SAD; 674 break; 675 case a_DAD: 676 tempc = a_f_DAD; 677 break; 678 case a_TAH: 679 tempc = a_f_TAH; 680 break; 681 case a_ZAH: 682 tempc = a_f_ZAH; 683 break; 684 case a_AIN: 685 tempc = a_f_AIN; 686 break; 687 case a_GHAIN: 688 tempc = a_f_GHAIN; 689 break; 690 case a_TATWEEL: /* exception */ 691 tempc = cur_c; 692 break; 693 case a_FEH: 694 tempc = a_f_FEH; 695 break; 696 case a_QAF: 697 tempc = a_f_QAF; 698 break; 699 case a_KAF: 700 tempc = a_f_KAF; 701 break; 702 case a_LAM: 703 tempc = a_f_LAM; 704 break; 705 case a_MEEM: 706 tempc = a_f_MEEM; 707 break; 708 case a_NOON: 709 tempc = a_f_NOON; 710 break; 711 case a_HEH: 712 tempc = a_f_HEH; 713 break; 714 case a_WAW: 715 tempc = a_f_WAW; 716 break; 717 case a_ALEF_MAKSURA: 718 tempc = a_f_ALEF_MAKSURA; 719 break; 720 case a_YEH: 721 tempc = a_f_YEH; 722 break; 723 default: 724 tempc = 0; 725 } 726 727 return tempc; 728} 729 730 731/* 732 * Change shape - from Initial to Medial 733 */ 734 static int 735chg_c_i2m(cur_c) 736 int cur_c; 737{ 738 int tempc; 739 740 switch (cur_c) 741 { 742 case a_i_YEH_HAMZA: 743 tempc = a_m_YEH_HAMZA; 744 break; 745 case a_i_BEH: 746 tempc = a_m_BEH; 747 break; 748 case a_i_TEH: 749 tempc = a_m_TEH; 750 break; 751 case a_i_THEH: 752 tempc = a_m_THEH; 753 break; 754 case a_i_JEEM: 755 tempc = a_m_JEEM; 756 break; 757 case a_i_HAH: 758 tempc = a_m_HAH; 759 break; 760 case a_i_KHAH: 761 tempc = a_m_KHAH; 762 break; 763 case a_i_SEEN: 764 tempc = a_m_SEEN; 765 break; 766 case a_i_SHEEN: 767 tempc = a_m_SHEEN; 768 break; 769 case a_i_SAD: 770 tempc = a_m_SAD; 771 break; 772 case a_i_DAD: 773 tempc = a_m_DAD; 774 break; 775 case a_i_TAH: 776 tempc = a_m_TAH; 777 break; 778 case a_i_ZAH: 779 tempc = a_m_ZAH; 780 break; 781 case a_i_AIN: 782 tempc = a_m_AIN; 783 break; 784 case a_i_GHAIN: 785 tempc = a_m_GHAIN; 786 break; 787 case a_i_FEH: 788 tempc = a_m_FEH; 789 break; 790 case a_i_QAF: 791 tempc = a_m_QAF; 792 break; 793 case a_i_KAF: 794 tempc = a_m_KAF; 795 break; 796 case a_i_LAM: 797 tempc = a_m_LAM; 798 break; 799 case a_i_MEEM: 800 tempc = a_m_MEEM; 801 break; 802 case a_i_NOON: 803 tempc = a_m_NOON; 804 break; 805 case a_i_HEH: 806 tempc = a_m_HEH; 807 break; 808 case a_i_YEH: 809 tempc = a_m_YEH; 810 break; 811 default: 812 tempc = 0; 813 } 814 815 return tempc; 816} 817 818 819/* 820 * Change shape - from Final to Medial 821 */ 822 static int 823chg_c_f2m(cur_c) 824 int cur_c; 825{ 826 int tempc; 827 828 switch (cur_c) 829 { 830 /* NOTE: these encodings are multi-positional, no ? 831 case a_f_ALEF_MADDA: 832 case a_f_ALEF_HAMZA_ABOVE: 833 case a_f_ALEF_HAMZA_BELOW: 834 */ 835 case a_f_YEH_HAMZA: 836 tempc = a_m_YEH_HAMZA; 837 break; 838 case a_f_WAW_HAMZA: /* exceptions */ 839 case a_f_ALEF: 840 case a_f_TEH_MARBUTA: 841 case a_f_DAL: 842 case a_f_THAL: 843 case a_f_REH: 844 case a_f_ZAIN: 845 case a_f_WAW: 846 case a_f_ALEF_MAKSURA: 847 tempc = cur_c; 848 break; 849 case a_f_BEH: 850 tempc = a_m_BEH; 851 break; 852 case a_f_TEH: 853 tempc = a_m_TEH; 854 break; 855 case a_f_THEH: 856 tempc = a_m_THEH; 857 break; 858 case a_f_JEEM: 859 tempc = a_m_JEEM; 860 break; 861 case a_f_HAH: 862 tempc = a_m_HAH; 863 break; 864 case a_f_KHAH: 865 tempc = a_m_KHAH; 866 break; 867 case a_f_SEEN: 868 tempc = a_m_SEEN; 869 break; 870 case a_f_SHEEN: 871 tempc = a_m_SHEEN; 872 break; 873 case a_f_SAD: 874 tempc = a_m_SAD; 875 break; 876 case a_f_DAD: 877 tempc = a_m_DAD; 878 break; 879 case a_f_TAH: 880 tempc = a_m_TAH; 881 break; 882 case a_f_ZAH: 883 tempc = a_m_ZAH; 884 break; 885 case a_f_AIN: 886 tempc = a_m_AIN; 887 break; 888 case a_f_GHAIN: 889 tempc = a_m_GHAIN; 890 break; 891 case a_f_FEH: 892 tempc = a_m_FEH; 893 break; 894 case a_f_QAF: 895 tempc = a_m_QAF; 896 break; 897 case a_f_KAF: 898 tempc = a_m_KAF; 899 break; 900 case a_f_LAM: 901 tempc = a_m_LAM; 902 break; 903 case a_f_MEEM: 904 tempc = a_m_MEEM; 905 break; 906 case a_f_NOON: 907 tempc = a_m_NOON; 908 break; 909 case a_f_HEH: 910 tempc = a_m_HEH; 911 break; 912 case a_f_YEH: 913 tempc = a_m_YEH; 914 break; 915 /* NOTE: these encodings are multi-positional, no ? 916 case a_f_LAM_ALEF_MADDA_ABOVE: 917 case a_f_LAM_ALEF_HAMZA_ABOVE: 918 case a_f_LAM_ALEF_HAMZA_BELOW: 919 case a_f_LAM_ALEF: 920 */ 921 default: 922 tempc = 0; 923 } 924 925 return tempc; 926} 927 928 929/* 930 * Change shape - from Combination (2 char) to an Isolated 931 */ 932 static int 933chg_c_laa2i(hid_c) 934 int hid_c; 935{ 936 int tempc; 937 938 switch (hid_c) 939 { 940 case a_ALEF_MADDA: 941 tempc = a_s_LAM_ALEF_MADDA_ABOVE; 942 break; 943 case a_ALEF_HAMZA_ABOVE: 944 tempc = a_s_LAM_ALEF_HAMZA_ABOVE; 945 break; 946 case a_ALEF_HAMZA_BELOW: 947 tempc = a_s_LAM_ALEF_HAMZA_BELOW; 948 break; 949 case a_ALEF: 950 tempc = a_s_LAM_ALEF; 951 break; 952 default: 953 tempc = 0; 954 } 955 956 return tempc; 957} 958 959 960/* 961 * Change shape - from Combination-Isolated to Final 962 */ 963 static int 964chg_c_laa2f(hid_c) 965 int hid_c; 966{ 967 int tempc; 968 969 switch (hid_c) 970 { 971 case a_ALEF_MADDA: 972 tempc = a_f_LAM_ALEF_MADDA_ABOVE; 973 break; 974 case a_ALEF_HAMZA_ABOVE: 975 tempc = a_f_LAM_ALEF_HAMZA_ABOVE; 976 break; 977 case a_ALEF_HAMZA_BELOW: 978 tempc = a_f_LAM_ALEF_HAMZA_BELOW; 979 break; 980 case a_ALEF: 981 tempc = a_f_LAM_ALEF; 982 break; 983 default: 984 tempc = 0; 985 } 986 987 return tempc; 988} 989 990/* 991 * Do "half-shaping" on character "c". Return zero if no shaping. 992 */ 993 static int 994half_shape(c) 995 int c; 996{ 997 if (A_is_a(c)) 998 return chg_c_a2i(c); 999 if (A_is_valid(c) && A_is_f(c)) 1000 return chg_c_f2m(c); 1001 return 0; 1002} 1003 1004/* 1005 * Do Arabic shaping on character "c". Returns the shaped character. 1006 * out: "ccp" points to the first byte of the character to be shaped. 1007 * in/out: "c1p" points to the first composing char for "c". 1008 * in: "prev_c" is the previous character (not shaped) 1009 * in: "prev_c1" is the first composing char for the previous char 1010 * (not shaped) 1011 * in: "next_c" is the next character (not shaped). 1012 */ 1013 int 1014arabic_shape(c, ccp, c1p, prev_c, prev_c1, next_c) 1015 int c; 1016 int *ccp; 1017 int *c1p; 1018 int prev_c; 1019 int prev_c1; 1020 int next_c; 1021{ 1022 int curr_c; 1023 int shape_c; 1024 int curr_laa; 1025 int prev_laa; 1026 1027 /* Deal only with Arabic character, pass back all others */ 1028 if (!A_is_ok(c)) 1029 return c; 1030 1031 /* half-shape current and previous character */ 1032 shape_c = half_shape(prev_c); 1033 1034 /* Save away current character */ 1035 curr_c = c; 1036 1037 curr_laa = A_firstc_laa(c, *c1p); 1038 prev_laa = A_firstc_laa(prev_c, prev_c1); 1039 1040 if (curr_laa) 1041 { 1042 if (A_is_valid(prev_c) && !A_is_f(shape_c) 1043 && !A_is_s(shape_c) && !prev_laa) 1044 curr_c = chg_c_laa2f(curr_laa); 1045 else 1046 curr_c = chg_c_laa2i(curr_laa); 1047 1048 /* Remove the composing character */ 1049 *c1p = 0; 1050 } 1051 else if (!A_is_valid(prev_c) && A_is_valid(next_c)) 1052 curr_c = chg_c_a2i(c); 1053 else if (!shape_c || A_is_f(shape_c) || A_is_s(shape_c) || prev_laa) 1054 curr_c = A_is_valid(next_c) ? chg_c_a2i(c) : chg_c_a2s(c); 1055 else if (A_is_valid(next_c)) 1056 curr_c = A_is_iso(c) ? chg_c_a2m(c) : chg_c_i2m(c); 1057 else if (A_is_valid(prev_c)) 1058 curr_c = chg_c_a2f(c); 1059 else 1060 curr_c = chg_c_a2s(c); 1061 1062 /* Sanity check -- curr_c should, in the future, never be 0. 1063 * We should, in the future, insert a fatal error here. */ 1064 if (curr_c == NUL) 1065 curr_c = c; 1066 1067 if (curr_c != c && ccp != NULL) 1068 { 1069 char_u buf[MB_MAXBYTES]; 1070 1071 /* Update the first byte of the character. */ 1072 (*mb_char2bytes)(curr_c, buf); 1073 *ccp = buf[0]; 1074 } 1075 1076 /* Return the shaped character */ 1077 return curr_c; 1078} 1079 1080 1081/* 1082 * A_firstc_laa returns first character of LAA combination if it exists 1083 */ 1084 static int 1085A_firstc_laa(c, c1) 1086 int c; /* base character */ 1087 int c1; /* first composing character */ 1088{ 1089 if (c1 != NUL && c == a_LAM && !A_is_harakat(c1)) 1090 return c1; 1091 return 0; 1092} 1093 1094 1095/* 1096 * A_is_harakat returns TRUE if 'c' is an Arabic Harakat character 1097 * (harakat/tanween) 1098 */ 1099 static int 1100A_is_harakat(c) 1101 int c; 1102{ 1103 return (c >= a_FATHATAN && c <= a_SUKUN); 1104} 1105 1106 1107/* 1108 * A_is_iso returns TRUE if 'c' is an Arabic ISO-8859-6 character 1109 * (alphabet/number/punctuation) 1110 */ 1111 static int 1112A_is_iso(c) 1113 int c; 1114{ 1115 return ((c >= a_HAMZA && c <= a_GHAIN) 1116 || (c >= a_TATWEEL && c <= a_HAMZA_BELOW) 1117 || c == a_MINI_ALEF); 1118} 1119 1120 1121/* 1122 * A_is_formb returns TRUE if 'c' is an Arabic 10646-1 FormB character 1123 * (alphabet/number/punctuation) 1124 */ 1125 static int 1126A_is_formb(c) 1127 int c; 1128{ 1129 return ((c >= a_s_FATHATAN && c <= a_s_DAMMATAN) 1130 || c == a_s_KASRATAN 1131 || (c >= a_s_FATHA && c <= a_f_LAM_ALEF) 1132 || c == a_BYTE_ORDER_MARK); 1133} 1134 1135 1136/* 1137 * A_is_ok returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B) 1138 */ 1139 static int 1140A_is_ok(c) 1141 int c; 1142{ 1143 return (A_is_iso(c) || A_is_formb(c)); 1144} 1145 1146 1147/* 1148 * A_is_valid returns TRUE if 'c' is an Arabic 10646 (8859-6 or Form-B) 1149 * with some exceptions/exclusions 1150 */ 1151 static int 1152A_is_valid(c) 1153 int c; 1154{ 1155 return (A_is_ok(c) && !A_is_special(c)); 1156} 1157 1158 1159/* 1160 * A_is_special returns TRUE if 'c' is not a special Arabic character. 1161 * Specials don't adhere to most of the rules. 1162 */ 1163 static int 1164A_is_special(c) 1165 int c; 1166{ 1167 return (c == a_HAMZA || c == a_s_HAMZA); 1168} 1169