1#!/usr/bin/env tclsh 2 3# gen_unicode_data.tcl -- 4# 5# This program parses the UnicodeData files and generates the 6# corresponding unicode_data.tcl file with compressed character 7# data tables. The input to this program should be 8# UnicodeData.txt and CompositionExclusions.txt files 9# from: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt 10# and ftp://ftp.unicode.org/Public/UNIDATA/CompositionExclusions.txt 11# 12# Copyright (c) 1998-1999 by Scriptics Corporation. 13# All rights reserved. 14# 15# Modified for ejabberd by Alexey Shchepin 16# Modified for Tcl stringprep by Sergei Golovan 17# 18# Usage: gen_unicode_data.tcl infile1 infile2 outdir 19# 20# RCS: @(#) $Id: gen_unicode_data.tcl,v 1.1 2008/01/29 02:18:10 patthoyts Exp $ 21 22 23namespace eval uni { 24 set cclass_shift 2 25 set decomp_shift 3 26 set comp_shift 1 27 set shift 5; # number of bits of data within a page 28 # This value can be adjusted to find the 29 # best split to minimize table size 30 31 variable pMap; # map from page to page index, each entry is 32 # an index into the pages table, indexed by 33 # page number 34 variable pages; # map from page index to page info, each 35 # entry is a list of indices into the groups 36 # table, the list is indexed by the offset 37 variable groups; # list of character info values, indexed by 38 # group number, initialized with the 39 # unassigned character group 40 41 variable categories { 42 Cn Lu Ll Lt Lm Lo Mn Me Mc Nd Nl No Zs Zl Zp 43 Cc Cf Co Cs Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So 44 }; # Ordered list of character categories, must 45 # match the enumeration in the header file. 46 47 variable titleCount 0; # Count of the number of title case 48 # characters. This value is used in the 49 # regular expression code to allocate enough 50 # space for the title case variants. 51} 52 53proc uni::getValue {items index} { 54 variable categories 55 variable titleCount 56 57 # Extract character info 58 59 set category [lindex $items 2] 60 if {[scan [lindex $items 12] %4x toupper] == 1} { 61 set toupper [expr {$index - $toupper}] 62 } else { 63 set toupper {} 64 } 65 if {[scan [lindex $items 13] %4x tolower] == 1} { 66 set tolower [expr {$tolower - $index}] 67 } else { 68 set tolower {} 69 } 70 if {[scan [lindex $items 14] %4x totitle] == 1} { 71 set totitle [expr {$index - $totitle}] 72 } else { 73 set totitle {} 74 } 75 76 set categoryIndex [lsearch -exact $categories $category] 77 if {$categoryIndex < 0} { 78 puts "Unexpected character category: $index($category)" 79 set categoryIndex 0 80 } elseif {$category == "Lt"} { 81 incr titleCount 82 } 83 84 return "$categoryIndex,$toupper,$tolower,$totitle" 85} 86 87proc uni::getGroup {value} { 88 variable groups 89 90 set gIndex [lsearch -exact $groups $value] 91 if {$gIndex == -1} { 92 set gIndex [llength $groups] 93 lappend groups $value 94 } 95 return $gIndex 96} 97 98proc uni::addPage {info} { 99 variable pMap 100 variable pages 101 102 set pIndex [lsearch -exact $pages $info] 103 if {$pIndex == -1} { 104 set pIndex [llength $pages] 105 lappend pages $info 106 } 107 lappend pMap $pIndex 108 return 109} 110 111proc uni::addPage {map_var pages_var info} { 112 variable $map_var 113 variable $pages_var 114 115 set pIndex [lsearch -exact [set $pages_var] $info] 116 if {$pIndex == -1} { 117 set pIndex [llength [set $pages_var]] 118 lappend $pages_var $info 119 } 120 lappend $map_var $pIndex 121 return 122} 123 124proc uni::load_exclusions {data} { 125 variable exclusions 126 127 foreach line [split $data \n] { 128 if {$line == ""} continue 129 130 set items [split $line " "] 131 132 if {[lindex $items 0] == "#"} continue 133 134 scan [lindex $items 0] %x index 135 136 set exclusions($index) "" 137 } 138} 139 140proc uni::load_tables {data} { 141 variable cclass_map 142 variable decomp_map 143 variable decomp_compat 144 variable comp_map 145 variable comp_first 146 variable comp_second 147 variable exclusions 148 149 foreach line [split $data \n] { 150 if {$line == ""} continue 151 152 set items [split $line \;] 153 154 scan [lindex $items 0] %x index 155 set cclass [lindex $items 3] 156 set decomp [lindex $items 5] 157 158 set cclass_map($index) $cclass 159 #set decomp_map($index) $cclass 160 161 if {$decomp != ""} { 162 set decomp_compat($index) 0 163 if {[string index [lindex $decomp 0] 0] == "<"} { 164 set decomp_compat($index) 1 165 set decomp1 [lreplace $decomp 0 0] 166 set decomp {} 167 foreach ch $decomp1 { 168 scan $ch %x ch 169 lappend decomp $ch 170 } 171 set decomp_map($index) $decomp 172 } else { 173 switch -- [llength $decomp] { 174 1 { 175 scan $decomp %x ch 176 set decomp_map($index) $ch 177 } 178 2 { 179 scan $decomp "%x %x" ch1 ch2 180 set decomp [list $ch1 $ch2] 181 set decomp_map($index) $decomp 182 # hackish 183 if {(![info exists cclass_map($ch1)] || \ 184 $cclass_map($ch1) == 0) && \ 185 ![info exists exclusions($index)]} { 186 if {[info exists comp_first($ch1)]} { 187 incr comp_first($ch1) 188 } else { 189 set comp_first($ch1) 1 190 } 191 if {[info exists comp_second($ch2)]} { 192 incr comp_second($ch2) 193 } else { 194 set comp_second($ch2) 1 195 } 196 set comp_map($decomp) $index 197 } else { 198 #puts "Excluded $index" 199 } 200 } 201 default { 202 puts "Bad canonical decomposition: $line" 203 } 204 } 205 } 206 207 #puts "[format 0x%0.4x $index]\t$cclass\t$decomp_map($index)" 208 } 209 } 210 #puts [array get comp_first] 211 #puts [array get comp_second] 212} 213 214proc uni::buildTables {} { 215 variable cclass_shift 216 variable decomp_shift 217 variable comp_shift 218 219 variable cclass_map 220 variable cclass_pmap {} 221 variable cclass_pages {} 222 variable decomp_map 223 variable decomp_compat 224 variable decomp_pmap {} 225 variable decomp_pages {} 226 variable decomp_list {} 227 variable comp_map 228 variable comp_pmap {} 229 variable comp_pages {} 230 variable comp_first 231 variable comp_second 232 variable comp_first_list {} 233 variable comp_second_list {} 234 variable comp_x_list {} 235 variable comp_y_list {} 236 variable comp_both_map {} 237 238 set cclass_info {} 239 set decomp_info {} 240 set comp_info {} 241 242 set cclass_mask [expr {(1 << $cclass_shift) - 1}] 243 set decomp_mask [expr {(1 << $decomp_shift) - 1}] 244 set comp_mask [expr {(1 << $comp_shift) - 1}] 245 246 foreach comp [array names comp_map] { 247 set ch1 [lindex $comp 0] 248 if {[info exists comp_first($ch1)] && $comp_first($ch1) > 0 && \ 249 [info exists comp_second($ch1)] && $comp_second($ch1) > 0} { 250 if {[lsearch -exact $comp_x_list $ch1] < 0} { 251 set i [llength $comp_x_list] 252 lappend comp_x_list $ch1 253 set comp_info_map($ch1) $i 254 lappend comp_y_list $ch1 255 set comp_info_map($ch1) $i 256 puts "There should be no symbols which appears on" 257 puts "both first and second place in composition" 258 exit 1 259 } 260 } 261 } 262 263 foreach comp [array names comp_map] { 264 set ch1 [lindex $comp 0] 265 set ch2 [lindex $comp 1] 266 267 if {$comp_first($ch1) == 1 && ![info exists comp_second($ch1)]} { 268 set i [llength $comp_first_list] 269 lappend comp_first_list [list $ch2 $comp_map($comp)] 270 set comp_info_map($ch1) [expr {$i | (1 << 16)}] 271 } elseif {$comp_second($ch2) == 1 && ![info exists comp_first($ch2)]} { 272 set i [llength $comp_second_list] 273 lappend comp_second_list [list $ch1 $comp_map($comp)] 274 set comp_info_map($ch2) [expr {$i | (1 << 16) | (1 << 17)}] 275 } else { 276 if {[lsearch -exact $comp_x_list $ch1] < 0} { 277 set i [llength $comp_x_list] 278 lappend comp_x_list $ch1 279 set comp_info_map($ch1) $i 280 } 281 if {[lsearch -exact $comp_y_list $ch2] < 0} { 282 set i [llength $comp_y_list] 283 lappend comp_y_list $ch2 284 set comp_info_map($ch2) [expr {$i | (1 << 17)}] 285 } 286 } 287 } 288 289 set next 0 290 291 for {set i 0} {$i <= 0x10ffff} {incr i} { 292 #set gIndex [getGroup [getValue $i]] 293 294 set cclass_offset [expr {$i & $cclass_mask}] 295 296 if {[info exists cclass_map($i)]} { 297 set cclass $cclass_map($i) 298 } else { 299 set cclass 0 300 } 301 lappend cclass_info $cclass 302 303 if {$cclass_offset == $cclass_mask} { 304 addPage cclass_pmap cclass_pages $cclass_info 305 set cclass_info {} 306 } 307 308 309 set decomp_offset [expr {$i & $decomp_mask}] 310 311 if {[info exists decomp_map($i)]} { 312 set decomp $decomp_map($i) 313 if {[llength $decomp] > (1 << 14)} { 314 puts "Too long decomp for $i" 315 exit 1 316 } 317 318 if {[info exists decomp_used($decomp)]} { 319 lappend decomp_info [expr {$decomp_used($decomp) | ($decomp_compat($i) << 16)}] 320 } else { 321 set val [expr {([llength $decomp] << 17) + \ 322 [llength $decomp_list]}] 323 set decomp_used($decomp) $val 324 lappend decomp_info [expr {$val | ($decomp_compat($i) << 16)}] 325 #puts "$val $decomp" 326 foreach d $decomp { 327 lappend decomp_list $d 328 } 329 } 330 } else { 331 lappend decomp_info -1 332 } 333 334 if {$decomp_offset == $decomp_mask} { 335 addPage decomp_pmap decomp_pages $decomp_info 336 set decomp_info {} 337 } 338 339 340 set comp_offset [expr {$i & $comp_mask}] 341 342 if {[info exists comp_info_map($i)]} { 343 set comp $comp_info_map($i) 344 } else { 345 set comp -1 346 } 347 lappend comp_info $comp 348 349 if {$comp_offset == $comp_mask} { 350 addPage comp_pmap comp_pages $comp_info 351 set comp_info {} 352 } 353 } 354 355 #puts [array get decomp_map] 356 #puts $decomp_list 357 358 return 359} 360 361proc uni::main {} { 362 global argc argv0 argv 363 variable cclass_shift 364 variable cclass_pmap 365 variable cclass_pages 366 variable decomp_shift 367 variable decomp_pmap 368 variable decomp_pages 369 variable decomp_list 370 variable comp_shift 371 variable comp_map 372 variable comp_pmap 373 variable comp_pages 374 variable comp_first_list 375 variable comp_second_list 376 variable comp_x_list 377 variable comp_y_list 378 variable pages 379 variable groups {} 380 variable titleCount 381 382 if {$argc != 3} { 383 puts stderr "\nusage: $argv0 <datafile> <exclusionsfile> <outdir>\n" 384 exit 1 385 } 386 set f [open [lindex $argv 1] r] 387 set data [read $f] 388 close $f 389 390 load_exclusions $data 391 392 set f [open [lindex $argv 0] r] 393 set data [read $f] 394 close $f 395 396 load_tables $data 397 buildTables 398 #puts "X = [llength $pMap] Y= [llength $pages] A= [llength $groups]" 399 #set size [expr {[llength $pMap] + [llength $pages]*(1<<$shift)}] 400 #puts "shift = 6, space = $size" 401 #puts "title case count = $titleCount" 402 403 set f [open [file join [lindex $argv 2] unicode_data.tcl] w] 404 fconfigure $f -translation lf 405 puts $f \ 406"# unicode_data.tcl -- 407# 408# Declarations of Unicode character information tables. This file is 409# automatically generated by the gen_unicode_data.tcl script. Do not 410# modify this file by hand. 411# 412# Copyright (c) 1998 Scriptics Corporation. 413# Copyright (c) 2007 Alexey Shchepin 414# Copyright (c) 2007 Sergei Golovan 415# 416# See the file \"license.terms\" for information on usage and redistribution 417# of this file, and for a DISCLAIMER OF ALL WARRANTIES. 418# 419# RCS: @(#) \$Id\$ 420 421# 422# A 16-bit Unicode character is split into two parts in order to index 423# into the following tables. The lower CCLASS_OFFSET_BITS comprise an offset 424# into a page of characters. The upper bits comprise the page number. 425# 426 427package provide unicode::data 1.0.0 428 429namespace eval ::unicode::data { 430 431set CCLASS_OFFSET_BITS $cclass_shift 432 433# 434# The cclassPageMap is indexed by page number and returns an alternate page number 435# that identifies a unique page of characters. Many Unicode characters map 436# to the same alternate page number. 437# 438 439array unset cclassPageMap 440array set cclassPageMap \[list \\" 441 array unset tmp 442 foreach idx $cclass_pmap { 443 if {![info exists tmp($idx)]} { 444 set tmp($idx) 1 445 } else { 446 incr tmp($idx) 447 } 448 } 449 set max 0 450 set max_id 0 451 foreach idx [array names tmp] { 452 if {$tmp($idx) > $max} { 453 set max $tmp($idx) 454 set max_id $idx 455 } 456 } 457 set line " " 458 set last [expr {[llength $cclass_pmap] - 1}] 459 for {set i 0} {$i <= $last} {incr i} { 460 set num [lindex $cclass_pmap $i] 461 if {$num != $max_id} { 462 append line " $i $num" 463 } 464 if {[string length $line] > 70} { 465 puts $f "$line \\" 466 set line " " 467 } 468 } 469 puts $f "$line\] 470 471set CCLASS_COMMON_PAGE_MAP $max_id 472 473# 474# The cclassGroupMap is indexed by combining the alternate page number with 475# the page offset and returns a combining class number. 476# 477 478set cclassGroupMap \[list \\" 479 set line " " 480 set lasti [expr {[llength $cclass_pages] - 1}] 481 for {set i 0} {$i <= $lasti} {incr i} { 482 set page [lindex $cclass_pages $i] 483 set lastj [expr {[llength $page] - 1}] 484 for {set j 0} {$j <= $lastj} {incr j} { 485 append line [lindex $page $j] 486 if {$j != $lastj || $i != $lasti} { 487 append line " " 488 } 489 if {[string length $line] > 70} { 490 puts $f "$line\\" 491 set line " " 492 } 493 } 494 } 495 puts $f "$line\] 496 497proc GetUniCharCClass {uc} { 498 variable CCLASS_OFFSET_BITS 499 variable CCLASS_COMMON_PAGE_MAP 500 variable cclassPageMap 501 variable cclassGroupMap 502 503 set page \[expr {(\$uc & 0x1fffff) >> \$CCLASS_OFFSET_BITS}\] 504 if {\[info exists cclassPageMap(\$page)\]} { 505 set apage \$cclassPageMap(\$page) 506 } else { 507 set apage \$CCLASS_COMMON_PAGE_MAP 508 } 509 510 lindex \$cclassGroupMap \\ 511 \[expr {(\$apage << \$CCLASS_OFFSET_BITS) | \\ 512 (\$uc & ((1 << \$CCLASS_OFFSET_BITS) - 1))}\] 513} 514 515 516set DECOMP_OFFSET_BITS $decomp_shift 517 518# 519# The pageMap is indexed by page number and returns an alternate page number 520# that identifies a unique page of characters. Many Unicode characters map 521# to the same alternate page number. 522# 523 524array unset decompPageMap 525array set decompPageMap \[list \\" 526 array unset tmp 527 foreach idx $decomp_pmap { 528 if {![info exists tmp($idx)]} { 529 set tmp($idx) 1 530 } else { 531 incr tmp($idx) 532 } 533 } 534 set max 0 535 set max_id 0 536 foreach idx [array names tmp] { 537 if {$tmp($idx) > $max} { 538 set max $tmp($idx) 539 set max_id $idx 540 } 541 } 542 set line " " 543 set last [expr {[llength $decomp_pmap] - 1}] 544 for {set i 0} {$i <= $last} {incr i} { 545 set num [lindex $decomp_pmap $i] 546 if {$num != $max_id} { 547 append line " $i $num" 548 } 549 if {[string length $line] > 70} { 550 puts $f "$line \\" 551 set line " " 552 } 553 } 554 puts $f "$line\] 555 556set DECOMP_COMMON_PAGE_MAP $max_id 557 558# 559# The decompGroupMap is indexed by combining the alternate page number with 560# the page offset and returns a group number that identifies a length and 561# shift of decomposition sequence in decompList 562# 563 564set decompGroupMap \[list \\" 565 set line " " 566 set lasti [expr {[llength $decomp_pages] - 1}] 567 for {set i 0} {$i <= $lasti} {incr i} { 568 set page [lindex $decomp_pages $i] 569 set lastj [expr {[llength $page] - 1}] 570 for {set j 0} {$j <= $lastj} {incr j} { 571 append line [lindex $page $j] 572 if {$j != $lastj || $i != $lasti} { 573 append line " " 574 } 575 if {[string length $line] > 70} { 576 puts $f "$line\\" 577 set line " " 578 } 579 } 580 } 581 puts $f "$line\] 582 583# 584# List of decomposition sequences 585# 586 587set decompList \[list \\" 588 set line " " 589 set last [expr {[llength $decomp_list] - 1}] 590 for {set i 0} {$i <= $last} {incr i} { 591 set val [lindex $decomp_list $i] 592 593 append line [format "%d" $val] 594 if {$i != $last} { 595 append line " " 596 } 597 if {[string length $line] > 70} { 598 puts $f "$line\\" 599 set line " " 600 } 601 } 602 puts $f "$line\] 603 604set DECOMP_COMPAT_MASK [expr {1 << 16}] 605set DECOMP_INFO_BITS 17 606 607# 608# This macro extracts the information about a character from the 609# Unicode character tables. 610# 611 612proc GetUniCharDecompCompatInfo {uc} { 613 variable DECOMP_OFFSET_BITS 614 variable DECOMP_COMMON_PAGE_MAP 615 variable decompPageMap 616 variable decompGroupMap 617 618 set page \[expr {(\$uc & 0x1fffff) >> \$DECOMP_OFFSET_BITS}\] 619 if {\[info exists decompPageMap(\$page)\]} { 620 set apage \$decompPageMap(\$page) 621 } else { 622 set apage \$DECOMP_COMMON_PAGE_MAP 623 } 624 625 lindex \$decompGroupMap \\ 626 \[expr {(\$apage << \$DECOMP_OFFSET_BITS) | \\ 627 (\$uc & ((1 << \$DECOMP_OFFSET_BITS) - 1))}\] 628} 629 630proc GetUniCharDecompInfo {uc} { 631 variable DECOMP_COMPAT_MASK 632 633 set info \[GetUniCharDecompCompatInfo \$uc\] 634 if {\$info & \$DECOMP_COMPAT_MASK} { 635 return -1 636 } else { 637 return \$info 638 } 639} 640 641proc GetDecompList {info} { 642 variable DECOMP_INFO_BITS 643 variable decompList 644 645 set decomp_len \[expr {\$info >> \$DECOMP_INFO_BITS}\] 646 set decomp_shift \[expr {\$info & ((1 << (\$DECOMP_INFO_BITS - 1)) - 1)}\] 647 648 lrange \$decompList \$decomp_shift \[expr {\$decomp_shift + \$decomp_len - 1}\] 649} 650 651set COMP_OFFSET_BITS $comp_shift 652 653# 654# The pageMap is indexed by page number and returns an alternate page number 655# that identifies a unique page of characters. Many Unicode characters map 656# to the same alternate page number. 657# 658 659array unset compPageMap 660array set compPageMap \[list \\" 661 array unset tmp 662 foreach idx $comp_pmap { 663 if {![info exists tmp($idx)]} { 664 set tmp($idx) 1 665 } else { 666 incr tmp($idx) 667 } 668 } 669 set max 0 670 set max_id 0 671 foreach idx [array names tmp] { 672 if {$tmp($idx) > $max} { 673 set max $tmp($idx) 674 set max_id $idx 675 } 676 } 677 set line " " 678 set last [expr {[llength $comp_pmap] - 1}] 679 for {set i 0} {$i <= $last} {incr i} { 680 set num [lindex $comp_pmap $i] 681 if {$num != $max_id} { 682 append line " $i $num" 683 } 684 if {[string length $line] > 70} { 685 puts $f "$line \\" 686 set line " " 687 } 688 } 689 puts $f "$line\] 690 691set COMP_COMMON_PAGE_MAP $max_id 692 693# 694# The groupMap is indexed by combining the alternate page number with 695# the page offset and returns a group number that identifies a unique 696# set of character attributes. 697# 698 699set compGroupMap \[list \\" 700 set line " " 701 set lasti [expr {[llength $comp_pages] - 1}] 702 for {set i 0} {$i <= $lasti} {incr i} { 703 set page [lindex $comp_pages $i] 704 set lastj [expr {[llength $page] - 1}] 705 for {set j 0} {$j <= $lastj} {incr j} { 706 append line [lindex $page $j] 707 if {$j != $lastj || $i != $lasti} { 708 append line " " 709 } 710 if {[string length $line] > 70} { 711 puts $f "$line\\" 712 set line " " 713 } 714 } 715 } 716 puts $f "$line\] 717 718# 719# Lists of compositions for characters that appears only in one composition 720# 721 722set compFirstList \[list \\" 723 set line " " 724 set last [expr {[llength $comp_first_list] - 1}] 725 for {set i 0} {$i <= $last} {incr i} { 726 set val [lindex $comp_first_list $i] 727 728 append line [format "{%d %d}" [lindex $val 0] [lindex $val 1]] 729 if {$i != $last} { 730 append line " " 731 } 732 if {[string length $line] > 60} { 733 puts $f "$line\\" 734 set line " " 735 } 736 } 737 puts $f "$line\] 738 739set compSecondList \[list \\" 740 set line " " 741 set last [expr {[llength $comp_second_list] - 1}] 742 for {set i 0} {$i <= $last} {incr i} { 743 set val [lindex $comp_second_list $i] 744 745 append line [format "{%d %d}" [lindex $val 0] [lindex $val 1]] 746 if {$i != $last} { 747 append line " " 748 } 749 if {[string length $line] > 60} { 750 puts $f "$line\\" 751 set line " " 752 } 753 } 754 puts $f "$line\] 755 756# 757# Compositions matrix 758# 759 760array unset compBothMap 761array set compBothMap \[list \\" 762 set lastx [expr {[llength $comp_x_list] - 1}] 763 set lasty [expr {[llength $comp_y_list] - 1}] 764 set line " " 765 for {set i 0} {$i <= $lastx} {incr i} { 766 for {set j 0} {$j <= $lasty} {incr j} { 767 set comp [list [lindex $comp_x_list $i] [lindex $comp_y_list $j]] 768 if {[info exists comp_map($comp)]} { 769 append line " " [expr {$i*[llength $comp_x_list]+$j}] \ 770 " " [format "%d" $comp_map($comp)] 771 } 772 if {[string length $line] > 70} { 773 puts $f "$line \\" 774 set line " " 775 } 776 } 777 } 778 puts $f "$line\] 779 780 781proc GetUniCharCompInfo {uc} { 782 variable COMP_OFFSET_BITS 783 variable COMP_COMMON_PAGE_MAP 784 variable compPageMap 785 variable compGroupMap 786 787 set page \[expr {(\$uc & 0x1fffff) >> \$COMP_OFFSET_BITS}\] 788 if {\[info exists compPageMap(\$page)\]} { 789 set apage \$compPageMap(\$page) 790 } else { 791 set apage \$COMP_COMMON_PAGE_MAP 792 } 793 794 lindex \$compGroupMap \\ 795 \[expr {(\$apage << \$COMP_OFFSET_BITS) | \\ 796 (\$uc & ((1 << \$COMP_OFFSET_BITS) - 1))}\] 797} 798 799set COMP_SINGLE_MASK [expr {1 << 16}] 800set COMP_SECOND_MASK [expr {1 << 17}] 801set COMP_MASK [expr {(1 << 16) - 1}] 802set COMP_LENGTH1 [llength $comp_x_list] 803 804proc GetCompFirst {uc info} { 805 variable COMP_SINGLE_MASK 806 variable COMP_SECOND_MASK 807 variable COMP_MASK 808 variable compFirstList 809 810 if {\$info == -1 || !(\$info & \$COMP_SINGLE_MASK)} { 811 return -1 812 } 813 if {!(\$info & \$COMP_SECOND_MASK)} { 814 set comp \[lindex \$compFirstList \[expr {\$info & \$COMP_MASK}\]\] 815 if {\$uc == \[lindex \$comp 0\]} { 816 return \[lindex \$comp 1\] 817 } 818 } 819 return 0 820} 821 822proc GetCompSecond {uc info} { 823 variable COMP_SINGLE_MASK 824 variable COMP_SECOND_MASK 825 variable COMP_MASK 826 variable compSecondList 827 828 if {\$info == -1 || !(\$info & \$COMP_SINGLE_MASK)} { 829 return -1 830 } 831 if {\$info & \$COMP_SECOND_MASK} { 832 set comp \[lindex \$compSecondList \[expr {\$info & \$COMP_MASK}\]\] 833 if {\$uc == \[lindex \$comp 0\]} { 834 return \[lindex \$comp 1\] 835 } 836 } 837 return 0 838} 839 840proc GetCompBoth {info1 info2} { 841 variable COMP_SECOND_MASK 842 variable COMP_MASK 843 variable COMP_LENGTH1 844 variable compBothMap 845 846 if {\$info1 != -1 && \$info2 != -1 && \ 847 !(\$info1 & \$COMP_SECOND_MASK) && (\$info2 & \$COMP_SECOND_MASK)} { 848 set idx \[expr {\$COMP_LENGTH1 * \$info1 + (\$info2 & \$COMP_MASK)}\] 849 if {\[info exists compBothMap(\$idx)\]} { 850 return \$compBothMap(\$idx) 851 } else { 852 return 0 853 } 854 } else { 855 return 0 856 } 857} 858 859} ; # namespace eval ::unicode::data 860" 861 862 close $f 863} 864 865uni::main 866 867return 868