1#!/usr/bin/env tclsh
2
3# gen_unicode_data.tcl --
4#
5#	This program parses the UnicodeData files and generates the
6#	corresponding unicode_data.tcl file with compressed character
7#	data tables.  The input to this program should be
8#	UnicodeData.txt and CompositionExclusions.txt files
9#	from: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
10#	and ftp://ftp.unicode.org/Public/UNIDATA/CompositionExclusions.txt
11#
12# Copyright (c) 1998-1999 by Scriptics Corporation.
13# All rights reserved.
14#
15# Modified for ejabberd by Alexey Shchepin
16# Modified for Tcl stringprep by Sergei Golovan
17#
18# Usage: gen_unicode_data.tcl infile1 infile2 outdir
19#
20# RCS: @(#) $Id: gen_unicode_data.tcl,v 1.1 2008/01/29 02:18:10 patthoyts Exp $
21
22
23namespace eval uni {
24    set cclass_shift 2
25    set decomp_shift 3
26    set comp_shift 1
27    set shift 5;		# number of bits of data within a page
28				# This value can be adjusted to find the
29				# best split to minimize table size
30
31    variable pMap;		# map from page to page index, each entry is
32				# an index into the pages table, indexed by
33				# page number
34    variable pages;		# map from page index to page info, each
35				# entry is a list of indices into the groups
36				# table, the list is indexed by the offset
37    variable groups;		# list of character info values, indexed by
38				# group number, initialized with the
39				# unassigned character group
40
41    variable categories {
42	Cn Lu Ll Lt Lm Lo Mn Me Mc Nd Nl No Zs Zl Zp
43	Cc Cf Co Cs Pc Pd Ps Pe Pi Pf Po Sm Sc Sk So
44    };				# Ordered list of character categories, must
45				# match the enumeration in the header file.
46
47    variable titleCount 0;	# Count of the number of title case
48				# characters.  This value is used in the
49				# regular expression code to allocate enough
50				# space for the title case variants.
51}
52
53proc uni::getValue {items index} {
54    variable categories
55    variable titleCount
56
57    # Extract character info
58
59    set category [lindex $items 2]
60    if {[scan [lindex $items 12] %4x toupper] == 1} {
61	set toupper [expr {$index - $toupper}]
62    } else {
63	set toupper {}
64    }
65    if {[scan [lindex $items 13] %4x tolower] == 1} {
66	set tolower [expr {$tolower - $index}]
67    } else {
68	set tolower {}
69    }
70    if {[scan [lindex $items 14] %4x totitle] == 1} {
71	set totitle [expr {$index - $totitle}]
72    } else {
73	set totitle {}
74    }
75
76    set categoryIndex [lsearch -exact $categories $category]
77    if {$categoryIndex < 0} {
78	puts "Unexpected character category: $index($category)"
79	set categoryIndex 0
80    } elseif {$category == "Lt"} {
81	incr titleCount
82    }
83
84    return "$categoryIndex,$toupper,$tolower,$totitle"
85}
86
87proc uni::getGroup {value} {
88    variable groups
89
90    set gIndex [lsearch -exact $groups $value]
91    if {$gIndex == -1} {
92	set gIndex [llength $groups]
93	lappend groups $value
94    }
95    return $gIndex
96}
97
98proc uni::addPage {info} {
99    variable pMap
100    variable pages
101
102    set pIndex [lsearch -exact $pages $info]
103    if {$pIndex == -1} {
104	set pIndex [llength $pages]
105	lappend pages $info
106    }
107    lappend pMap $pIndex
108    return
109}
110
111proc uni::addPage {map_var pages_var info} {
112    variable $map_var
113    variable $pages_var
114
115    set pIndex [lsearch -exact [set $pages_var] $info]
116    if {$pIndex == -1} {
117	set pIndex [llength [set $pages_var]]
118	lappend $pages_var $info
119    }
120    lappend $map_var $pIndex
121    return
122}
123
124proc uni::load_exclusions {data} {
125    variable exclusions
126
127    foreach line [split $data \n] {
128	if {$line == ""} continue
129
130	set items [split $line " "]
131
132	if {[lindex $items 0] == "#"} continue
133
134	scan [lindex $items 0] %x index
135
136	set exclusions($index) ""
137    }
138}
139
140proc uni::load_tables {data} {
141    variable cclass_map
142    variable decomp_map
143    variable decomp_compat
144    variable comp_map
145    variable comp_first
146    variable comp_second
147    variable exclusions
148
149    foreach line [split $data \n] {
150	if {$line == ""} continue
151
152	set items [split $line \;]
153
154	scan [lindex $items 0] %x index
155	set cclass [lindex $items 3]
156	set decomp [lindex $items 5]
157
158	set cclass_map($index) $cclass
159	#set decomp_map($index) $cclass
160
161	if {$decomp != ""} {
162	    set decomp_compat($index) 0
163	    if {[string index [lindex $decomp 0] 0] == "<"} {
164		set decomp_compat($index) 1
165		set decomp1 [lreplace $decomp 0 0]
166		set decomp {}
167		foreach ch $decomp1 {
168		    scan $ch %x ch
169		    lappend decomp $ch
170		}
171		set decomp_map($index) $decomp
172	    } else {
173		switch -- [llength $decomp] {
174		    1 {
175			scan $decomp %x ch
176			set decomp_map($index) $ch
177		    }
178		    2 {
179			scan $decomp "%x %x" ch1 ch2
180			set decomp [list $ch1 $ch2]
181			set decomp_map($index) $decomp
182			# hackish
183			if {(![info exists cclass_map($ch1)] || \
184				 $cclass_map($ch1) == 0) && \
185				![info exists exclusions($index)]} {
186			    if {[info exists comp_first($ch1)]} {
187				incr comp_first($ch1)
188			    } else {
189				set comp_first($ch1) 1
190			    }
191			    if {[info exists comp_second($ch2)]} {
192				incr comp_second($ch2)
193			    } else {
194				set comp_second($ch2) 1
195			    }
196			    set comp_map($decomp) $index
197			} else {
198			    #puts "Excluded $index"
199			}
200		    }
201		    default {
202			puts "Bad canonical decomposition: $line"
203		    }
204		}
205	    }
206
207	    #puts "[format 0x%0.4x $index]\t$cclass\t$decomp_map($index)"
208	}
209    }
210    #puts [array get comp_first]
211    #puts [array get comp_second]
212}
213
214proc uni::buildTables {} {
215    variable cclass_shift
216    variable decomp_shift
217    variable comp_shift
218
219    variable cclass_map
220    variable cclass_pmap {}
221    variable cclass_pages {}
222    variable decomp_map
223    variable decomp_compat
224    variable decomp_pmap {}
225    variable decomp_pages {}
226    variable decomp_list {}
227    variable comp_map
228    variable comp_pmap {}
229    variable comp_pages {}
230    variable comp_first
231    variable comp_second
232    variable comp_first_list {}
233    variable comp_second_list {}
234    variable comp_x_list {}
235    variable comp_y_list {}
236    variable comp_both_map {}
237
238    set cclass_info {}
239    set decomp_info {}
240    set comp_info {}
241
242    set cclass_mask [expr {(1 << $cclass_shift) - 1}]
243    set decomp_mask [expr {(1 << $decomp_shift) - 1}]
244    set comp_mask [expr {(1 << $comp_shift) - 1}]
245
246    foreach comp [array names comp_map] {
247	set ch1 [lindex $comp 0]
248	if {[info exists comp_first($ch1)] && $comp_first($ch1) > 0 && \
249		[info exists comp_second($ch1)] && $comp_second($ch1) > 0} {
250	    if {[lsearch -exact $comp_x_list $ch1] < 0} {
251		set i [llength $comp_x_list]
252		lappend comp_x_list $ch1
253		set comp_info_map($ch1) $i
254		lappend comp_y_list $ch1
255		set comp_info_map($ch1) $i
256		puts "There should be no symbols which appears on"
257		puts "both first and second place in composition"
258		exit 1
259	    }
260	}
261    }
262
263    foreach comp [array names comp_map] {
264	set ch1 [lindex $comp 0]
265	set ch2 [lindex $comp 1]
266
267	if {$comp_first($ch1) == 1 && ![info exists comp_second($ch1)]} {
268	    set i [llength $comp_first_list]
269	    lappend comp_first_list [list $ch2 $comp_map($comp)]
270	    set comp_info_map($ch1) [expr {$i | (1 << 16)}]
271	} elseif {$comp_second($ch2) == 1 && ![info exists comp_first($ch2)]} {
272	    set i [llength $comp_second_list]
273	    lappend comp_second_list [list $ch1 $comp_map($comp)]
274	    set comp_info_map($ch2) [expr {$i | (1 << 16) | (1 << 17)}]
275	} else {
276	    if {[lsearch -exact $comp_x_list $ch1] < 0} {
277		set i [llength $comp_x_list]
278		lappend comp_x_list $ch1
279		set comp_info_map($ch1) $i
280	    }
281	    if {[lsearch -exact $comp_y_list $ch2] < 0} {
282		set i [llength $comp_y_list]
283		lappend comp_y_list $ch2
284		set comp_info_map($ch2) [expr {$i | (1 << 17)}]
285	    }
286	}
287    }
288
289    set next 0
290
291    for {set i 0} {$i <= 0x10ffff} {incr i} {
292	#set gIndex [getGroup [getValue $i]]
293
294	set cclass_offset [expr {$i & $cclass_mask}]
295
296	if {[info exists cclass_map($i)]} {
297	    set cclass $cclass_map($i)
298	} else {
299	    set cclass 0
300	}
301	lappend cclass_info $cclass
302
303	if {$cclass_offset == $cclass_mask} {
304	    addPage cclass_pmap cclass_pages $cclass_info
305	    set cclass_info {}
306	}
307
308
309	set decomp_offset [expr {$i & $decomp_mask}]
310
311	if {[info exists decomp_map($i)]} {
312	    set decomp $decomp_map($i)
313	    if {[llength $decomp] > (1 << 14)} {
314		puts "Too long decomp for $i"
315		exit 1
316	    }
317
318	    if {[info exists decomp_used($decomp)]} {
319		lappend decomp_info [expr {$decomp_used($decomp) | ($decomp_compat($i) << 16)}]
320	    } else {
321		set val [expr {([llength $decomp] << 17) + \
322				   [llength $decomp_list]}]
323		set decomp_used($decomp) $val
324		lappend decomp_info [expr {$val | ($decomp_compat($i) << 16)}]
325		#puts "$val $decomp"
326		foreach d $decomp {
327		    lappend decomp_list $d
328		}
329	    }
330	} else {
331	    lappend decomp_info -1
332	}
333
334	if {$decomp_offset == $decomp_mask} {
335	    addPage decomp_pmap decomp_pages $decomp_info
336	    set decomp_info {}
337	}
338
339
340	set comp_offset [expr {$i & $comp_mask}]
341
342	if {[info exists comp_info_map($i)]} {
343	    set comp $comp_info_map($i)
344	} else {
345	    set comp -1
346	}
347	lappend comp_info $comp
348
349	if {$comp_offset == $comp_mask} {
350	    addPage comp_pmap comp_pages $comp_info
351	    set comp_info {}
352	}
353    }
354
355    #puts [array get decomp_map]
356    #puts $decomp_list
357
358    return
359}
360
361proc uni::main {} {
362    global argc argv0 argv
363    variable cclass_shift
364    variable cclass_pmap
365    variable cclass_pages
366    variable decomp_shift
367    variable decomp_pmap
368    variable decomp_pages
369    variable decomp_list
370    variable comp_shift
371    variable comp_map
372    variable comp_pmap
373    variable comp_pages
374    variable comp_first_list
375    variable comp_second_list
376    variable comp_x_list
377    variable comp_y_list
378    variable pages
379    variable groups {}
380    variable titleCount
381
382    if {$argc != 3} {
383	puts stderr "\nusage: $argv0 <datafile> <exclusionsfile> <outdir>\n"
384	exit 1
385    }
386    set f [open [lindex $argv 1] r]
387    set data [read $f]
388    close $f
389
390    load_exclusions $data
391
392    set f [open [lindex $argv 0] r]
393    set data [read $f]
394    close $f
395
396    load_tables $data
397    buildTables
398    #puts "X = [llength $pMap]  Y= [llength $pages]  A= [llength $groups]"
399    #set size [expr {[llength $pMap] + [llength $pages]*(1<<$shift)}]
400    #puts "shift = 6, space = $size"
401    #puts "title case count = $titleCount"
402
403    set f [open [file join [lindex $argv 2] unicode_data.tcl] w]
404    fconfigure $f -translation lf
405    puts $f \
406"# unicode_data.tcl --
407#
408#	Declarations of Unicode character information tables.  This file is
409#	automatically generated by the gen_unicode_data.tcl script.  Do not
410#	modify this file by hand.
411#
412# Copyright (c) 1998 Scriptics Corporation.
413# Copyright (c) 2007 Alexey Shchepin
414# Copyright (c) 2007 Sergei Golovan
415#
416# See the file \"license.terms\" for information on usage and redistribution
417# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
418#
419# RCS: @(#) \$Id\$
420
421#
422# A 16-bit Unicode character is split into two parts in order to index
423# into the following tables.  The lower CCLASS_OFFSET_BITS comprise an offset
424# into a page of characters.  The upper bits comprise the page number.
425#
426
427package provide unicode::data 1.0.0
428
429namespace eval ::unicode::data {
430
431set CCLASS_OFFSET_BITS $cclass_shift
432
433#
434# The cclassPageMap is indexed by page number and returns an alternate page number
435# that identifies a unique page of characters.  Many Unicode characters map
436# to the same alternate page number.
437#
438
439array unset cclassPageMap
440array set cclassPageMap \[list \\"
441    array unset tmp
442    foreach idx $cclass_pmap {
443	if {![info exists tmp($idx)]} {
444	    set tmp($idx) 1
445	} else {
446	    incr tmp($idx)
447	}
448    }
449    set max 0
450    set max_id 0
451    foreach idx [array names tmp] {
452	if {$tmp($idx) > $max} {
453	    set max $tmp($idx)
454	    set max_id $idx
455	}
456    }
457    set line "   "
458    set last [expr {[llength $cclass_pmap] - 1}]
459    for {set i 0} {$i <= $last} {incr i} {
460	set num [lindex $cclass_pmap $i]
461	if {$num != $max_id} {
462	    append line " $i $num"
463	}
464	if {[string length $line] > 70} {
465	    puts $f "$line \\"
466	    set line "   "
467	}
468    }
469    puts $f "$line\]
470
471set CCLASS_COMMON_PAGE_MAP $max_id
472
473#
474# The cclassGroupMap is indexed by combining the alternate page number with
475# the page offset and returns a combining class number.
476#
477
478set cclassGroupMap \[list \\"
479    set line "    "
480    set lasti [expr {[llength $cclass_pages] - 1}]
481    for {set i 0} {$i <= $lasti} {incr i} {
482	set page [lindex $cclass_pages $i]
483	set lastj [expr {[llength $page] - 1}]
484	for {set j 0} {$j <= $lastj} {incr j} {
485	    append line [lindex $page $j]
486	    if {$j != $lastj || $i != $lasti} {
487		append line " "
488	    }
489	    if {[string length $line] > 70} {
490		puts $f "$line\\"
491		set line "    "
492	    }
493	}
494    }
495    puts $f "$line\]
496
497proc GetUniCharCClass {uc} {
498    variable CCLASS_OFFSET_BITS
499    variable CCLASS_COMMON_PAGE_MAP
500    variable cclassPageMap
501    variable cclassGroupMap
502
503    set page \[expr {(\$uc & 0x1fffff) >> \$CCLASS_OFFSET_BITS}\]
504    if {\[info exists cclassPageMap(\$page)\]} {
505	set apage \$cclassPageMap(\$page)
506    } else {
507	set apage \$CCLASS_COMMON_PAGE_MAP
508    }
509
510    lindex \$cclassGroupMap \\
511	   \[expr {(\$apage << \$CCLASS_OFFSET_BITS) | \\
512		   (\$uc & ((1 << \$CCLASS_OFFSET_BITS) - 1))}\]
513}
514
515
516set DECOMP_OFFSET_BITS $decomp_shift
517
518#
519# The pageMap is indexed by page number and returns an alternate page number
520# that identifies a unique page of characters.  Many Unicode characters map
521# to the same alternate page number.
522#
523
524array unset decompPageMap
525array set decompPageMap \[list \\"
526    array unset tmp
527    foreach idx $decomp_pmap {
528	if {![info exists tmp($idx)]} {
529	    set tmp($idx) 1
530	} else {
531	    incr tmp($idx)
532	}
533    }
534    set max 0
535    set max_id 0
536    foreach idx [array names tmp] {
537	if {$tmp($idx) > $max} {
538	    set max $tmp($idx)
539	    set max_id $idx
540	}
541    }
542    set line "   "
543    set last [expr {[llength $decomp_pmap] - 1}]
544    for {set i 0} {$i <= $last} {incr i} {
545	set num [lindex $decomp_pmap $i]
546	if {$num != $max_id} {
547	    append line " $i $num"
548	}
549	if {[string length $line] > 70} {
550	    puts $f "$line \\"
551	    set line "   "
552	}
553    }
554    puts $f "$line\]
555
556set DECOMP_COMMON_PAGE_MAP $max_id
557
558#
559# The decompGroupMap is indexed by combining the alternate page number with
560# the page offset and returns a group number that identifies a length and
561# shift of decomposition sequence in decompList
562#
563
564set decompGroupMap \[list \\"
565    set line "    "
566    set lasti [expr {[llength $decomp_pages] - 1}]
567    for {set i 0} {$i <= $lasti} {incr i} {
568	set page [lindex $decomp_pages $i]
569	set lastj [expr {[llength $page] - 1}]
570	for {set j 0} {$j <= $lastj} {incr j} {
571	    append line [lindex $page $j]
572	    if {$j != $lastj || $i != $lasti} {
573		append line " "
574	    }
575	    if {[string length $line] > 70} {
576		puts $f "$line\\"
577		set line "    "
578	    }
579	}
580    }
581    puts $f "$line\]
582
583#
584# List of decomposition sequences
585#
586
587set decompList \[list \\"
588    set line "    "
589    set last [expr {[llength $decomp_list] - 1}]
590    for {set i 0} {$i <= $last} {incr i} {
591	set val [lindex $decomp_list $i]
592
593	append line [format "%d" $val]
594	if {$i != $last} {
595	    append line " "
596	}
597	if {[string length $line] > 70} {
598	    puts $f "$line\\"
599	    set line "    "
600	}
601    }
602    puts $f "$line\]
603
604set DECOMP_COMPAT_MASK [expr {1 << 16}]
605set DECOMP_INFO_BITS 17
606
607#
608# This macro extracts the information about a character from the
609# Unicode character tables.
610#
611
612proc GetUniCharDecompCompatInfo {uc} {
613    variable DECOMP_OFFSET_BITS
614    variable DECOMP_COMMON_PAGE_MAP
615    variable decompPageMap
616    variable decompGroupMap
617
618    set page \[expr {(\$uc & 0x1fffff) >> \$DECOMP_OFFSET_BITS}\]
619    if {\[info exists decompPageMap(\$page)\]} {
620	set apage \$decompPageMap(\$page)
621    } else {
622	set apage \$DECOMP_COMMON_PAGE_MAP
623    }
624
625    lindex \$decompGroupMap \\
626	   \[expr {(\$apage << \$DECOMP_OFFSET_BITS) | \\
627		   (\$uc & ((1 << \$DECOMP_OFFSET_BITS) - 1))}\]
628}
629
630proc GetUniCharDecompInfo {uc} {
631    variable DECOMP_COMPAT_MASK
632
633    set info \[GetUniCharDecompCompatInfo \$uc\]
634    if {\$info & \$DECOMP_COMPAT_MASK} {
635	return -1
636    } else {
637	return \$info
638    }
639}
640
641proc GetDecompList {info} {
642    variable DECOMP_INFO_BITS
643    variable decompList
644
645    set decomp_len \[expr {\$info >> \$DECOMP_INFO_BITS}\]
646    set decomp_shift \[expr {\$info & ((1 << (\$DECOMP_INFO_BITS - 1)) - 1)}\]
647
648    lrange \$decompList \$decomp_shift \[expr {\$decomp_shift + \$decomp_len - 1}\]
649}
650
651set COMP_OFFSET_BITS $comp_shift
652
653#
654# The pageMap is indexed by page number and returns an alternate page number
655# that identifies a unique page of characters.  Many Unicode characters map
656# to the same alternate page number.
657#
658
659array unset compPageMap
660array set compPageMap \[list \\"
661    array unset tmp
662    foreach idx $comp_pmap {
663	if {![info exists tmp($idx)]} {
664	    set tmp($idx) 1
665	} else {
666	    incr tmp($idx)
667	}
668    }
669    set max 0
670    set max_id 0
671    foreach idx [array names tmp] {
672	if {$tmp($idx) > $max} {
673	    set max $tmp($idx)
674	    set max_id $idx
675	}
676    }
677    set line "   "
678    set last [expr {[llength $comp_pmap] - 1}]
679    for {set i 0} {$i <= $last} {incr i} {
680	set num [lindex $comp_pmap $i]
681	if {$num != $max_id} {
682	    append line " $i $num"
683	}
684	if {[string length $line] > 70} {
685	    puts $f "$line \\"
686	    set line "   "
687	}
688    }
689    puts $f "$line\]
690
691set COMP_COMMON_PAGE_MAP $max_id
692
693#
694# The groupMap is indexed by combining the alternate page number with
695# the page offset and returns a group number that identifies a unique
696# set of character attributes.
697#
698
699set compGroupMap \[list \\"
700    set line "    "
701    set lasti [expr {[llength $comp_pages] - 1}]
702    for {set i 0} {$i <= $lasti} {incr i} {
703	set page [lindex $comp_pages $i]
704	set lastj [expr {[llength $page] - 1}]
705	for {set j 0} {$j <= $lastj} {incr j} {
706	    append line [lindex $page $j]
707	    if {$j != $lastj || $i != $lasti} {
708		append line " "
709	    }
710	    if {[string length $line] > 70} {
711		puts $f "$line\\"
712		set line "    "
713	    }
714	}
715    }
716    puts $f "$line\]
717
718#
719# Lists of compositions for characters that appears only in one composition
720#
721
722set compFirstList \[list \\"
723    set line "    "
724    set last [expr {[llength $comp_first_list] - 1}]
725    for {set i 0} {$i <= $last} {incr i} {
726	set val [lindex $comp_first_list $i]
727
728	append line [format "{%d %d}" [lindex $val 0] [lindex $val 1]]
729	if {$i != $last} {
730	    append line " "
731	}
732	if {[string length $line] > 60} {
733	    puts $f "$line\\"
734	    set line "    "
735	}
736    }
737    puts $f "$line\]
738
739set compSecondList \[list \\"
740    set line "    "
741    set last [expr {[llength $comp_second_list] - 1}]
742    for {set i 0} {$i <= $last} {incr i} {
743	set val [lindex $comp_second_list $i]
744
745	append line [format "{%d %d}" [lindex $val 0] [lindex $val 1]]
746	if {$i != $last} {
747	    append line " "
748	}
749	if {[string length $line] > 60} {
750	    puts $f "$line\\"
751	    set line "    "
752	}
753    }
754    puts $f "$line\]
755
756#
757# Compositions matrix
758#
759
760array unset compBothMap
761array set compBothMap \[list \\"
762    set lastx [expr {[llength $comp_x_list] - 1}]
763    set lasty [expr {[llength $comp_y_list] - 1}]
764    set line "   "
765    for {set i 0} {$i <= $lastx} {incr i} {
766	for {set j 0} {$j <= $lasty} {incr j} {
767	    set comp [list [lindex $comp_x_list $i] [lindex $comp_y_list $j]]
768	    if {[info exists comp_map($comp)]} {
769		append line " " [expr {$i*[llength $comp_x_list]+$j}] \
770			    " " [format "%d" $comp_map($comp)]
771	    }
772	    if {[string length $line] > 70} {
773		puts $f "$line \\"
774		set line "   "
775	    }
776	}
777    }
778    puts $f "$line\]
779
780
781proc GetUniCharCompInfo {uc} {
782    variable COMP_OFFSET_BITS
783    variable COMP_COMMON_PAGE_MAP
784    variable compPageMap
785    variable compGroupMap
786
787    set page \[expr {(\$uc & 0x1fffff) >> \$COMP_OFFSET_BITS}\]
788    if {\[info exists compPageMap(\$page)\]} {
789	set apage \$compPageMap(\$page)
790    } else {
791	set apage \$COMP_COMMON_PAGE_MAP
792    }
793
794    lindex \$compGroupMap \\
795	   \[expr {(\$apage << \$COMP_OFFSET_BITS) | \\
796		   (\$uc & ((1 << \$COMP_OFFSET_BITS) - 1))}\]
797}
798
799set COMP_SINGLE_MASK [expr {1 << 16}]
800set COMP_SECOND_MASK [expr {1 << 17}]
801set COMP_MASK [expr {(1 << 16) - 1}]
802set COMP_LENGTH1 [llength $comp_x_list]
803
804proc GetCompFirst {uc info} {
805    variable COMP_SINGLE_MASK
806    variable COMP_SECOND_MASK
807    variable COMP_MASK
808    variable compFirstList
809
810    if {\$info == -1 || !(\$info & \$COMP_SINGLE_MASK)} {
811	return -1
812    }
813    if {!(\$info & \$COMP_SECOND_MASK)} {
814	set comp \[lindex \$compFirstList \[expr {\$info & \$COMP_MASK}\]\]
815	if {\$uc == \[lindex \$comp 0\]} {
816	    return \[lindex \$comp 1\]
817	}
818    }
819    return 0
820}
821
822proc GetCompSecond {uc info} {
823    variable COMP_SINGLE_MASK
824    variable COMP_SECOND_MASK
825    variable COMP_MASK
826    variable compSecondList
827
828    if {\$info == -1 || !(\$info & \$COMP_SINGLE_MASK)} {
829	return -1
830    }
831    if {\$info & \$COMP_SECOND_MASK} {
832	set comp \[lindex \$compSecondList \[expr {\$info & \$COMP_MASK}\]\]
833	if {\$uc == \[lindex \$comp 0\]} {
834	    return \[lindex \$comp 1\]
835	}
836    }
837    return 0
838}
839
840proc GetCompBoth {info1 info2} {
841    variable COMP_SECOND_MASK
842    variable COMP_MASK
843    variable COMP_LENGTH1
844    variable compBothMap
845
846    if {\$info1 != -1 && \$info2 != -1 && \
847       !(\$info1 & \$COMP_SECOND_MASK) && (\$info2 & \$COMP_SECOND_MASK)} {
848	set idx \[expr {\$COMP_LENGTH1 * \$info1 + (\$info2 & \$COMP_MASK)}\]
849	if {\[info exists compBothMap(\$idx)\]} {
850	    return \$compBothMap(\$idx)
851	} else {
852	    return 0
853	}
854    } else {
855	return 0
856    }
857}
858
859} ; # namespace eval ::unicode::data
860"
861
862    close $f
863}
864
865uni::main
866
867return
868