1/* $OpenLDAP$ */ 2/* This work is part of OpenLDAP Software <http://www.openldap.org/>. 3 * 4 * Copyright 1998-2011 The OpenLDAP Foundation. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted only as authorized by the OpenLDAP 9 * Public License. 10 * 11 * A copy of this license is available in file LICENSE in the 12 * top-level directory of the distribution or, alternatively, at 13 * <http://www.OpenLDAP.org/license.html>. 14 */ 15/* Copyright 2001 Computing Research Labs, New Mexico State University 16 * 17 * Permission is hereby granted, free of charge, to any person obtaining a 18 * copy of this software and associated documentation files (the "Software"), 19 * to deal in the Software without restriction, including without limitation 20 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 21 * and/or sell copies of the Software, and to permit persons to whom the 22 * Software is furnished to do so, subject to the following conditions: 23 * 24 * The above copyright notice and this permission notice shall be included in 25 * all copies or substantial portions of the Software. 26 * 27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 28 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 29 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 30 * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY 31 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT 32 * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR 33 * THE USE OR OTHER DEALINGS IN THE SOFTWARE. 34 */ 35/* $Id: ucpgba.c,v 1.5 2001/01/02 18:46:20 mleisher Exp $ */ 36 37#include "portable.h" 38 39#include <stdio.h> 40#include <stdlib.h> 41 42#include "ucdata.h" 43#include "ucpgba.h" 44 45/* 46 * These macros are used while reordering of RTL runs of text for the 47 * special case of non-spacing characters being in runs of weakly 48 * directional text. They check for weak and non-spacing, and digits and 49 * non-spacing. 50 */ 51#define ISWEAKSPECIAL(cc) ucisprop(cc, UC_EN|UC_ES|UC_MN, UC_ET|UC_AN|UC_CS) 52#define ISDIGITSPECIAL(cc) ucisprop(cc, UC_ND|UC_MN, 0) 53 54/* 55 * These macros are used while breaking a string into runs of text in 56 * different directions. Descriptions: 57 * 58 * ISLTR_LTR - Test for members of an LTR run in an LTR context. This looks 59 * for characters with ltr, non-spacing, weak, and neutral 60 * properties. 61 * 62 * ISRTL_RTL - Test for members of an RTL run in an RTL context. This looks 63 * for characters with rtl, non-spacing, weak, and neutral 64 * properties. 65 * 66 * ISRTL_NEUTRAL - Test for RTL or neutral characters. 67 * 68 * ISWEAK_NEUTRAL - Test for weak or neutral characters. 69 */ 70#define ISLTR_LTR(cc) ucisprop(cc, UC_L|UC_MN|UC_EN|UC_ES,\ 71 UC_ET|UC_CS|UC_B|UC_S|UC_WS|UC_ON) 72 73#define ISRTL_RTL(cc) ucisprop(cc, UC_R|UC_MN|UC_EN|UC_ES,\ 74 UC_ET|UC_AN|UC_CS|UC_B|UC_S|UC_WS|UC_ON) 75 76#define ISRTL_NEUTRAL(cc) ucisprop(cc, UC_R, UC_B|UC_S|UC_WS|UC_ON) 77#define ISWEAK_NEUTRAL(cc) ucisprop(cc, UC_EN|UC_ES, \ 78 UC_B|UC_S|UC_WS|UC_ON|UC_ET|UC_AN|UC_CS) 79 80/* 81 * This table is temporarily hard-coded here until it can be constructed 82 * automatically somehow. 83 */ 84static unsigned long _symmetric_pairs[] = { 85 0x0028, 0x0029, 0x0029, 0x0028, 0x003C, 0x003E, 0x003E, 0x003C, 86 0x005B, 0x005D, 0x005D, 0x005B, 0x007B, 0x007D, 0x007D, 0x007B, 87 0x2045, 0x2046, 0x2046, 0x2045, 0x207D, 0x207E, 0x207E, 0x207D, 88 0x208D, 0x208E, 0x208E, 0x208D, 0x3008, 0x3009, 0x3009, 0x3008, 89 0x300A, 0x300B, 0x300B, 0x300A, 0x300C, 0x300D, 0x300D, 0x300C, 90 0x300E, 0x300F, 0x300F, 0x300E, 0x3010, 0x3011, 0x3011, 0x3010, 91 0x3014, 0x3015, 0x3015, 0x3014, 0x3016, 0x3017, 0x3017, 0x3016, 92 0x3018, 0x3019, 0x3019, 0x3018, 0x301A, 0x301B, 0x301B, 0x301A, 93 0xFD3E, 0xFD3F, 0xFD3F, 0xFD3E, 0xFE59, 0xFE5A, 0xFE5A, 0xFE59, 94 0xFE5B, 0xFE5C, 0xFE5C, 0xFE5B, 0xFE5D, 0xFE5E, 0xFE5E, 0xFE5D, 95 0xFF08, 0xFF09, 0xFF09, 0xFF08, 0xFF3B, 0xFF3D, 0xFF3D, 0xFF3B, 96 0xFF5B, 0xFF5D, 0xFF5D, 0xFF5B, 0xFF62, 0xFF63, 0xFF63, 0xFF62, 97}; 98 99static int _symmetric_pairs_size = 100sizeof(_symmetric_pairs)/sizeof(_symmetric_pairs[0]); 101 102/* 103 * This routine looks up the other form of a symmetric pair. 104 */ 105static unsigned long 106_ucsymmetric_pair(unsigned long c) 107{ 108 int i; 109 110 for (i = 0; i < _symmetric_pairs_size; i += 2) { 111 if (_symmetric_pairs[i] == c) 112 return _symmetric_pairs[i+1]; 113 } 114 return c; 115} 116 117/* 118 * This routine creates a new run, copies the text into it, links it into the 119 * logical text order chain and returns it to the caller to be linked into 120 * the visual text order chain. 121 */ 122static ucrun_t * 123_add_run(ucstring_t *str, unsigned long *src, 124 unsigned long start, unsigned long end, int direction) 125{ 126 long i, t; 127 ucrun_t *run; 128 129 run = (ucrun_t *) malloc(sizeof(ucrun_t)); 130 run->visual_next = run->visual_prev = 0; 131 run->direction = direction; 132 133 run->cursor = ~0; 134 135 run->chars = (unsigned long *) 136 malloc(sizeof(unsigned long) * ((end - start) << 1)); 137 run->positions = run->chars + (end - start); 138 139 run->source = src; 140 run->start = start; 141 run->end = end; 142 143 if (direction == UCPGBA_RTL) { 144 /* 145 * Copy the source text into the run in reverse order and select 146 * replacements for the pairwise punctuation and the <> characters. 147 */ 148 for (i = 0, t = end - 1; start < end; start++, t--, i++) { 149 run->positions[i] = t; 150 if (ucissymmetric(src[t]) || src[t] == '<' || src[t] == '>') 151 run->chars[i] = _ucsymmetric_pair(src[t]); 152 else 153 run->chars[i] = src[t]; 154 } 155 } else { 156 /* 157 * Copy the source text into the run directly. 158 */ 159 for (i = start; i < end; i++) { 160 run->positions[i - start] = i; 161 run->chars[i - start] = src[i]; 162 } 163 } 164 165 /* 166 * Add the run to the logical list for cursor traversal. 167 */ 168 if (str->logical_first == 0) 169 str->logical_first = str->logical_last = run; 170 else { 171 run->logical_prev = str->logical_last; 172 str->logical_last->logical_next = run; 173 str->logical_last = run; 174 } 175 176 return run; 177} 178 179static void 180_ucadd_rtl_segment(ucstring_t *str, unsigned long *source, unsigned long start, 181 unsigned long end) 182{ 183 unsigned long s, e; 184 ucrun_t *run, *lrun; 185 186 /* 187 * This is used to splice runs into strings with overall LTR direction. 188 * The `lrun' variable will never be NULL because at least one LTR run was 189 * added before this RTL run. 190 */ 191 lrun = str->visual_last; 192 193 for (e = s = start; s < end;) { 194 for (; e < end && ISRTL_NEUTRAL(source[e]); e++) ; 195 196 if (e > s) { 197 run = _add_run(str, source, s, e, UCPGBA_RTL); 198 199 /* 200 * Add the run to the visual list for cursor traversal. 201 */ 202 if (str->visual_first != 0) { 203 if (str->direction == UCPGBA_LTR) { 204 run->visual_prev = lrun; 205 run->visual_next = lrun->visual_next; 206 if (lrun->visual_next != 0) 207 lrun->visual_next->visual_prev = run; 208 lrun->visual_next = run; 209 if (lrun == str->visual_last) 210 str->visual_last = run; 211 } else { 212 run->visual_next = str->visual_first; 213 str->visual_first->visual_prev = run; 214 str->visual_first = run; 215 } 216 } else 217 str->visual_first = str->visual_last = run; 218 } 219 220 /* 221 * Handle digits in a special way. This makes sure the weakly 222 * directional characters appear on the expected sides of a number 223 * depending on whether that number is Arabic or not. 224 */ 225 for (s = e; e < end && ISWEAKSPECIAL(source[e]); e++) { 226 if (!ISDIGITSPECIAL(source[e]) && 227 (e + 1 == end || !ISDIGITSPECIAL(source[e + 1]))) 228 break; 229 } 230 231 if (e > s) { 232 run = _add_run(str, source, s, e, UCPGBA_LTR); 233 234 /* 235 * Add the run to the visual list for cursor traversal. 236 */ 237 if (str->visual_first != 0) { 238 if (str->direction == UCPGBA_LTR) { 239 run->visual_prev = lrun; 240 run->visual_next = lrun->visual_next; 241 if (lrun->visual_next != 0) 242 lrun->visual_next->visual_prev = run; 243 lrun->visual_next = run; 244 if (lrun == str->visual_last) 245 str->visual_last = run; 246 } else { 247 run->visual_next = str->visual_first; 248 str->visual_first->visual_prev = run; 249 str->visual_first = run; 250 } 251 } else 252 str->visual_first = str->visual_last = run; 253 } 254 255 /* 256 * Collect all weak non-digit sequences for an RTL segment. These 257 * will appear as part of the next RTL segment or will be added as 258 * an RTL segment by themselves. 259 */ 260 for (s = e; e < end && ucisweak(source[e]) && !ucisdigit(source[e]); 261 e++) ; 262 } 263 264 /* 265 * Capture any weak non-digit sequences that occur at the end of the RTL 266 * run. 267 */ 268 if (e > s) { 269 run = _add_run(str, source, s, e, UCPGBA_RTL); 270 271 /* 272 * Add the run to the visual list for cursor traversal. 273 */ 274 if (str->visual_first != 0) { 275 if (str->direction == UCPGBA_LTR) { 276 run->visual_prev = lrun; 277 run->visual_next = lrun->visual_next; 278 if (lrun->visual_next != 0) 279 lrun->visual_next->visual_prev = run; 280 lrun->visual_next = run; 281 if (lrun == str->visual_last) 282 str->visual_last = run; 283 } else { 284 run->visual_next = str->visual_first; 285 str->visual_first->visual_prev = run; 286 str->visual_first = run; 287 } 288 } else 289 str->visual_first = str->visual_last = run; 290 } 291} 292 293static void 294_ucadd_ltr_segment(ucstring_t *str, unsigned long *source, unsigned long start, 295 unsigned long end) 296{ 297 ucrun_t *run; 298 299 run = _add_run(str, source, start, end, UCPGBA_LTR); 300 301 /* 302 * Add the run to the visual list for cursor traversal. 303 */ 304 if (str->visual_first != 0) { 305 if (str->direction == UCPGBA_LTR) { 306 run->visual_prev = str->visual_last; 307 str->visual_last->visual_next = run; 308 str->visual_last = run; 309 } else { 310 run->visual_next = str->visual_first; 311 str->visual_first->visual_prev = run; 312 str->visual_first = run; 313 } 314 } else 315 str->visual_first = str->visual_last = run; 316} 317 318ucstring_t * 319ucstring_create(unsigned long *source, unsigned long start, unsigned long end, 320 int default_direction, int cursor_motion) 321{ 322 int rtl_first; 323 unsigned long s, e, ld; 324 ucstring_t *str; 325 326 str = (ucstring_t *) malloc(sizeof(ucstring_t)); 327 328 /* 329 * Set the initial values. 330 */ 331 str->cursor_motion = cursor_motion; 332 str->logical_first = str->logical_last = 0; 333 str->visual_first = str->visual_last = str->cursor = 0; 334 str->source = source; 335 str->start = start; 336 str->end = end; 337 338 /* 339 * If the length of the string is 0, then just return it at this point. 340 */ 341 if (start == end) 342 return str; 343 344 /* 345 * This flag indicates whether the collection loop for RTL is called 346 * before the LTR loop the first time. 347 */ 348 rtl_first = 0; 349 350 /* 351 * Look for the first character in the string that has strong 352 * directionality. 353 */ 354 for (s = start; s < end && !ucisstrong(source[s]); s++) ; 355 356 if (s == end) 357 /* 358 * If the string contains no characters with strong directionality, use 359 * the default direction. 360 */ 361 str->direction = default_direction; 362 else 363 str->direction = ucisrtl(source[s]) ? UCPGBA_RTL : UCPGBA_LTR; 364 365 if (str->direction == UCPGBA_RTL) 366 /* 367 * Set the flag that causes the RTL collection loop to run first. 368 */ 369 rtl_first = 1; 370 371 /* 372 * This loop now separates the string into runs based on directionality. 373 */ 374 for (s = e = 0; s < end; s = e) { 375 if (!rtl_first) { 376 /* 377 * Determine the next run of LTR text. 378 */ 379 380 ld = s; 381 while (e < end && ISLTR_LTR(source[e])) { 382 if (ucisdigit(source[e]) && 383 !(0x660 <= source[e] && source[e] <= 0x669)) 384 ld = e; 385 e++; 386 } 387 if (str->direction != UCPGBA_LTR) { 388 while (e > ld && ISWEAK_NEUTRAL(source[e - 1])) 389 e--; 390 } 391 392 /* 393 * Add the LTR segment to the string. 394 */ 395 if (e > s) 396 _ucadd_ltr_segment(str, source, s, e); 397 } 398 399 /* 400 * Determine the next run of RTL text. 401 */ 402 ld = s = e; 403 while (e < end && ISRTL_RTL(source[e])) { 404 if (ucisdigit(source[e]) && 405 !(0x660 <= source[e] && source[e] <= 0x669)) 406 ld = e; 407 e++; 408 } 409 if (str->direction != UCPGBA_RTL) { 410 while (e > ld && ISWEAK_NEUTRAL(source[e - 1])) 411 e--; 412 } 413 414 /* 415 * Add the RTL segment to the string. 416 */ 417 if (e > s) 418 _ucadd_rtl_segment(str, source, s, e); 419 420 /* 421 * Clear the flag that allowed the RTL collection loop to run first 422 * for strings with overall RTL directionality. 423 */ 424 rtl_first = 0; 425 } 426 427 /* 428 * Set up the initial cursor run. 429 */ 430 str->cursor = str->logical_first; 431 if (str != 0) 432 str->cursor->cursor = (str->cursor->direction == UCPGBA_RTL) ? 433 str->cursor->end - str->cursor->start : 0; 434 435 return str; 436} 437 438void 439ucstring_free(ucstring_t *s) 440{ 441 ucrun_t *l, *r; 442 443 if (s == 0) 444 return; 445 446 for (l = 0, r = s->visual_first; r != 0; r = r->visual_next) { 447 if (r->end > r->start) 448 free((char *) r->chars); 449 if (l) 450 free((char *) l); 451 l = r; 452 } 453 if (l) 454 free((char *) l); 455 456 free((char *) s); 457} 458 459int 460ucstring_set_cursor_motion(ucstring_t *str, int cursor_motion) 461{ 462 int n; 463 464 if (str == 0) 465 return -1; 466 467 n = str->cursor_motion; 468 str->cursor_motion = cursor_motion; 469 return n; 470} 471 472static int 473_ucstring_visual_cursor_right(ucstring_t *str, int count) 474{ 475 int cnt = count; 476 unsigned long size; 477 ucrun_t *cursor; 478 479 if (str == 0) 480 return 0; 481 482 cursor = str->cursor; 483 while (cnt > 0) { 484 size = cursor->end - cursor->start; 485 if ((cursor->direction == UCPGBA_RTL && cursor->cursor + 1 == size) || 486 cursor->cursor + 1 > size) { 487 /* 488 * If the next run is NULL, then the cursor is already on the 489 * far right end already. 490 */ 491 if (cursor->visual_next == 0) 492 /* 493 * If movement occured, then report it. 494 */ 495 return (cnt != count); 496 497 /* 498 * Move to the next run. 499 */ 500 str->cursor = cursor = cursor->visual_next; 501 cursor->cursor = (cursor->direction == UCPGBA_RTL) ? -1 : 0; 502 size = cursor->end - cursor->start; 503 } else 504 cursor->cursor++; 505 cnt--; 506 } 507 return 1; 508} 509 510static int 511_ucstring_logical_cursor_right(ucstring_t *str, int count) 512{ 513 int cnt = count; 514 unsigned long size; 515 ucrun_t *cursor; 516 517 if (str == 0) 518 return 0; 519 520 cursor = str->cursor; 521 while (cnt > 0) { 522 size = cursor->end - cursor->start; 523 if (str->direction == UCPGBA_RTL) { 524 if (cursor->direction == UCPGBA_RTL) { 525 if (cursor->cursor + 1 == size) { 526 if (cursor == str->logical_first) 527 /* 528 * Already at the beginning of the string. 529 */ 530 return (cnt != count); 531 532 str->cursor = cursor = cursor->logical_prev; 533 size = cursor->end - cursor->start; 534 cursor->cursor = (cursor->direction == UCPGBA_LTR) ? 535 size : 0; 536 } else 537 cursor->cursor++; 538 } else { 539 if (cursor->cursor == 0) { 540 if (cursor == str->logical_first) 541 /* 542 * At the beginning of the string already. 543 */ 544 return (cnt != count); 545 546 str->cursor = cursor = cursor->logical_prev; 547 size = cursor->end - cursor->start; 548 cursor->cursor = (cursor->direction == UCPGBA_LTR) ? 549 size : 0; 550 } else 551 cursor->cursor--; 552 } 553 } else { 554 if (cursor->direction == UCPGBA_RTL) { 555 if (cursor->cursor == 0) { 556 if (cursor == str->logical_last) 557 /* 558 * Already at the end of the string. 559 */ 560 return (cnt != count); 561 562 str->cursor = cursor = cursor->logical_next; 563 size = cursor->end - cursor->start; 564 cursor->cursor = (cursor->direction == UCPGBA_LTR) ? 565 0 : size - 1; 566 } else 567 cursor->cursor--; 568 } else { 569 if (cursor->cursor + 1 > size) { 570 if (cursor == str->logical_last) 571 /* 572 * Already at the end of the string. 573 */ 574 return (cnt != count); 575 576 str->cursor = cursor = cursor->logical_next; 577 cursor->cursor = (cursor->direction == UCPGBA_LTR) ? 578 0 : size - 1; 579 } else 580 cursor->cursor++; 581 } 582 } 583 cnt--; 584 } 585 return 1; 586} 587 588int 589ucstring_cursor_right(ucstring_t *str, int count) 590{ 591 if (str == 0) 592 return 0; 593 return (str->cursor_motion == UCPGBA_CURSOR_VISUAL) ? 594 _ucstring_visual_cursor_right(str, count) : 595 _ucstring_logical_cursor_right(str, count); 596} 597 598static int 599_ucstring_visual_cursor_left(ucstring_t *str, int count) 600{ 601 int cnt = count; 602 unsigned long size; 603 ucrun_t *cursor; 604 605 if (str == 0) 606 return 0; 607 608 cursor = str->cursor; 609 while (cnt > 0) { 610 size = cursor->end - cursor->start; 611 if ((cursor->direction == UCPGBA_LTR && cursor->cursor == 0) || 612 cursor->cursor - 1 < -1) { 613 /* 614 * If the preceding run is NULL, then the cursor is already on the 615 * far left end already. 616 */ 617 if (cursor->visual_prev == 0) 618 /* 619 * If movement occured, then report it. 620 */ 621 return (cnt != count); 622 623 /* 624 * Move to the previous run. 625 */ 626 str->cursor = cursor = cursor->visual_prev; 627 size = cursor->end - cursor->start; 628 cursor->cursor = (cursor->direction == UCPGBA_RTL) ? 629 size : size - 1; 630 } else 631 cursor->cursor--; 632 cnt--; 633 } 634 return 1; 635} 636 637static int 638_ucstring_logical_cursor_left(ucstring_t *str, int count) 639{ 640 int cnt = count; 641 unsigned long size; 642 ucrun_t *cursor; 643 644 if (str == 0) 645 return 0; 646 647 cursor = str->cursor; 648 while (cnt > 0) { 649 size = cursor->end - cursor->start; 650 if (str->direction == UCPGBA_RTL) { 651 if (cursor->direction == UCPGBA_RTL) { 652 if (cursor->cursor == -1) { 653 if (cursor == str->logical_last) 654 /* 655 * Already at the end of the string. 656 */ 657 return (cnt != count); 658 659 str->cursor = cursor = cursor->logical_next; 660 size = cursor->end - cursor->start; 661 cursor->cursor = (cursor->direction == UCPGBA_LTR) ? 662 0 : size - 1; 663 } else 664 cursor->cursor--; 665 } else { 666 if (cursor->cursor + 1 > size) { 667 if (cursor == str->logical_last) 668 /* 669 * At the end of the string already. 670 */ 671 return (cnt != count); 672 673 str->cursor = cursor = cursor->logical_next; 674 size = cursor->end - cursor->start; 675 cursor->cursor = (cursor->direction == UCPGBA_LTR) ? 676 0 : size - 1; 677 } else 678 cursor->cursor++; 679 } 680 } else { 681 if (cursor->direction == UCPGBA_RTL) { 682 if (cursor->cursor + 1 == size) { 683 if (cursor == str->logical_first) 684 /* 685 * Already at the beginning of the string. 686 */ 687 return (cnt != count); 688 689 str->cursor = cursor = cursor->logical_prev; 690 size = cursor->end - cursor->start; 691 cursor->cursor = (cursor->direction == UCPGBA_LTR) ? 692 size : 0; 693 } else 694 cursor->cursor++; 695 } else { 696 if (cursor->cursor == 0) { 697 if (cursor == str->logical_first) 698 /* 699 * Already at the beginning of the string. 700 */ 701 return (cnt != count); 702 703 str->cursor = cursor = cursor->logical_prev; 704 cursor->cursor = (cursor->direction == UCPGBA_LTR) ? 705 size : 0; 706 } else 707 cursor->cursor--; 708 } 709 } 710 cnt--; 711 } 712 return 1; 713} 714 715int 716ucstring_cursor_left(ucstring_t *str, int count) 717{ 718 if (str == 0) 719 return 0; 720 return (str->cursor_motion == UCPGBA_CURSOR_VISUAL) ? 721 _ucstring_visual_cursor_left(str, count) : 722 _ucstring_logical_cursor_left(str, count); 723} 724 725void 726ucstring_cursor_info(ucstring_t *str, int *direction, unsigned long *position) 727{ 728 long c; 729 unsigned long size; 730 ucrun_t *cursor; 731 732 if (str == 0 || direction == 0 || position == 0) 733 return; 734 735 cursor = str->cursor; 736 737 *direction = cursor->direction; 738 739 c = cursor->cursor; 740 size = cursor->end - cursor->start; 741 742 if (c == size) 743 *position = (cursor->direction == UCPGBA_RTL) ? 744 cursor->start : cursor->positions[c - 1]; 745 else if (c == -1) 746 *position = (cursor->direction == UCPGBA_RTL) ? 747 cursor->end : cursor->start; 748 else 749 *position = cursor->positions[c]; 750} 751